Initial commit
This commit is contained in:
854
utils.py
Normal file
854
utils.py
Normal file
@@ -0,0 +1,854 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
utils.py - Utility Functions for Data Processing and Technical Indicators
|
||||
|
||||
Utility functions for data processing, technical indicators, validation, and configuration management
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
import shutil
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, List, Optional, Any, Union
|
||||
import pandas as pd
|
||||
import pandas_ta as ta
|
||||
import numpy as np
|
||||
from decimal import Decimal, ROUND_HALF_UP, InvalidOperation as DecimalException
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv('variables.env')
|
||||
|
||||
def setup_logging(log_level: str = None, log_file: str = None):
|
||||
"""Setup logging configuration"""
|
||||
# Use environment variables if parameters not provided
|
||||
if log_level is None:
|
||||
log_level = os.getenv('LOG_LEVEL', 'INFO')
|
||||
if log_file is None:
|
||||
log_file = os.getenv('LOG_FILE', 'crypto_collector.log')
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
date_format = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
# Configure root logger
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, log_level.upper()),
|
||||
format=log_format,
|
||||
datefmt=date_format,
|
||||
handlers=[
|
||||
logging.FileHandler(f"logs/{log_file}"),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
)
|
||||
|
||||
# Set specific log levels for external libraries
|
||||
logging.getLogger("websockets").setLevel(logging.WARNING)
|
||||
logging.getLogger("asyncio").setLevel(logging.WARNING)
|
||||
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
||||
logging.getLogger("binance").setLevel(logging.WARNING)
|
||||
|
||||
def load_config(config_file: str = "config.conf") -> Dict[str, Any]:
|
||||
"""Load configuration from JSON file"""
|
||||
logger = logging.getLogger(__name__)
|
||||
try:
|
||||
with open(config_file, 'r') as f:
|
||||
config = json.load(f)
|
||||
|
||||
# Validate configuration structure
|
||||
validate_config(config)
|
||||
logger.debug(f"Successfully loaded config from {config_file}")
|
||||
return config
|
||||
|
||||
except FileNotFoundError:
|
||||
logger.warning(f"Config file {config_file} not found, creating default")
|
||||
# Create default configuration if file doesn't exist
|
||||
default_config = create_default_config()
|
||||
save_config(default_config, config_file)
|
||||
return default_config
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Invalid JSON in configuration file: {e}")
|
||||
raise ValueError(f"Invalid JSON in configuration file: {e}")
|
||||
|
||||
def create_default_config() -> Dict[str, Any]:
|
||||
"""Create default configuration"""
|
||||
return {
|
||||
"trading_pairs": [
|
||||
{"symbol": "BTCUSDT", "enabled": True, "priority": 1},
|
||||
{"symbol": "ETHUSDT", "enabled": True, "priority": 1},
|
||||
{"symbol": "BNBUSDT", "enabled": True, "priority": 2},
|
||||
{"symbol": "XRPUSDT", "enabled": True, "priority": 3},
|
||||
{"symbol": "SOLUSDT", "enabled": True, "priority": 2}
|
||||
],
|
||||
"technical_indicators": {
|
||||
"enabled": ["sma", "ema", "rsi", "macd", "bb", "atr"],
|
||||
"periods": {
|
||||
"sma": [20, 50, 200],
|
||||
"ema": [12, 26],
|
||||
"rsi": [14],
|
||||
"macd": {"fast": 12, "slow": 26, "signal": 9},
|
||||
"bb": {"period": 20, "std": 2},
|
||||
"atr": [14],
|
||||
"stoch": {"k_period": 14, "d_period": 3},
|
||||
"adx": [14]
|
||||
},
|
||||
"calculation_intervals": ["1m", "5m", "15m", "1h", "4h", "1d"]
|
||||
},
|
||||
"collection": {
|
||||
"bulk_chunk_size": 1000,
|
||||
"websocket_reconnect_delay": 5,
|
||||
"tick_batch_size": 100,
|
||||
"candle_intervals": ["1m", "5m", "15m", "1h", "4h", "1d"],
|
||||
"max_retries": 3,
|
||||
"retry_delay": 1,
|
||||
"rate_limit_requests_per_minute": 2000,
|
||||
"concurrent_symbol_limit": 10
|
||||
},
|
||||
"database": {
|
||||
"batch_insert_size": 1000,
|
||||
"compression_after_days": 7,
|
||||
"retention_policy_days": 365,
|
||||
"vacuum_analyze_interval_hours": 24,
|
||||
"connection_pool": {
|
||||
"min_size": 10,
|
||||
"max_size": 50,
|
||||
"command_timeout": 60
|
||||
}
|
||||
},
|
||||
"ui": {
|
||||
"refresh_interval_seconds": 5,
|
||||
"max_chart_points": 1000,
|
||||
"default_timeframe": "1d",
|
||||
"theme": "dark",
|
||||
"enable_realtime_updates": True
|
||||
},
|
||||
"gap_filling": {
|
||||
"enable_auto_gap_filling": True,
|
||||
"auto_fill_schedule_hours": 24,
|
||||
"intervals_to_monitor": ["1m", "5m", "15m", "1h", "4h", "1d"],
|
||||
"max_gap_size_candles": 1000,
|
||||
"max_consecutive_empty_candles": 5,
|
||||
"averaging_lookback_candles": 10,
|
||||
"enable_intelligent_averaging": True,
|
||||
"max_fill_attempts": 3
|
||||
},
|
||||
|
||||
}
|
||||
|
||||
def save_config(config: Dict[str, Any], config_file: str = "config.conf"):
|
||||
"""Save configuration to JSON file using atomic write"""
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
# Validate before saving
|
||||
validate_config(config)
|
||||
|
||||
# Get the directory of the config file
|
||||
config_dir = os.path.dirname(config_file) or '.'
|
||||
|
||||
# Create a temporary file in the same directory
|
||||
temp_fd, temp_path = tempfile.mkstemp(
|
||||
dir=config_dir,
|
||||
prefix='.tmp_config_',
|
||||
suffix='.conf',
|
||||
text=True
|
||||
)
|
||||
|
||||
try:
|
||||
# Write to temporary file
|
||||
with os.fdopen(temp_fd, 'w') as f:
|
||||
json.dump(config, f, indent=2, sort_keys=False)
|
||||
f.flush()
|
||||
os.fsync(f.fileno()) # Force write to disk
|
||||
|
||||
# Atomic rename
|
||||
shutil.move(temp_path, config_file)
|
||||
logger.info(f"Configuration saved successfully to {config_file}")
|
||||
|
||||
except Exception as e:
|
||||
# Clean up temp file on error
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except:
|
||||
pass
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving config: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def validate_config(config: Dict[str, Any]):
|
||||
"""Validate configuration structure"""
|
||||
required_sections = ["trading_pairs", "technical_indicators", "collection", "database"]
|
||||
|
||||
for section in required_sections:
|
||||
if section not in config:
|
||||
raise ValueError(f"Missing required configuration section: {section}")
|
||||
|
||||
# Validate trading pairs
|
||||
if not isinstance(config["trading_pairs"], list):
|
||||
raise ValueError("trading_pairs must be a list")
|
||||
|
||||
for pair in config["trading_pairs"]:
|
||||
if not isinstance(pair, dict) or "symbol" not in pair:
|
||||
raise ValueError("Invalid trading pair configuration")
|
||||
if not validate_symbol(pair["symbol"]):
|
||||
raise ValueError(f"Invalid symbol format: {pair['symbol']}")
|
||||
|
||||
# Ensure required fields with defaults
|
||||
if "enabled" not in pair:
|
||||
pair["enabled"] = True
|
||||
if "priority" not in pair:
|
||||
pair["priority"] = 1
|
||||
|
||||
# Validate technical indicators
|
||||
indicators_config = config["technical_indicators"]
|
||||
if "enabled" not in indicators_config or "periods" not in indicators_config:
|
||||
raise ValueError("Invalid technical indicators configuration")
|
||||
|
||||
if not isinstance(indicators_config["enabled"], list):
|
||||
raise ValueError("technical_indicators.enabled must be a list")
|
||||
|
||||
def validate_symbol(symbol: str) -> bool:
|
||||
"""Validate trading pair symbol format"""
|
||||
# Binance symbol format: base currency + quote currency (e.g., BTCUSDT)
|
||||
if not symbol or len(symbol) < 6:
|
||||
return False
|
||||
|
||||
# Should be uppercase letters/numbers only
|
||||
if not re.match(r'^[A-Z0-9]+$', symbol):
|
||||
return False
|
||||
|
||||
# Should end with common quote currencies
|
||||
quote_currencies = ['USDT', 'BUSD', 'BTC', 'ETH', 'BNB', 'USDC', 'TUSD', 'DAI']
|
||||
if not any(symbol.endswith(quote) for quote in quote_currencies):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def reload_env_vars(env_file: str = 'variables.env'):
|
||||
"""Reload environment variables from file"""
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(env_file, override=True)
|
||||
|
||||
def format_timestamp(timestamp: Union[int, float, str, datetime]) -> datetime:
|
||||
"""Format timestamp to datetime object"""
|
||||
if isinstance(timestamp, datetime):
|
||||
# Ensure timezone awareness
|
||||
if timestamp.tzinfo is None:
|
||||
return timestamp.replace(tzinfo=timezone.utc)
|
||||
return timestamp
|
||||
|
||||
if isinstance(timestamp, str):
|
||||
try:
|
||||
# Try parsing ISO format first
|
||||
return datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
||||
except ValueError:
|
||||
try:
|
||||
# Try parsing as timestamp
|
||||
timestamp = float(timestamp)
|
||||
except ValueError:
|
||||
raise ValueError(f"Invalid timestamp string format: {timestamp}")
|
||||
|
||||
if isinstance(timestamp, (int, float)):
|
||||
# Handle both seconds and milliseconds timestamps
|
||||
if timestamp > 1e10: # Milliseconds
|
||||
timestamp = timestamp / 1000
|
||||
return datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
||||
|
||||
raise ValueError(f"Invalid timestamp format: {type(timestamp)}")
|
||||
|
||||
def parse_kline_data(data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Parse Binance kline/candlestick data"""
|
||||
kline = data['k']
|
||||
return {
|
||||
'time': format_timestamp(kline['t']),
|
||||
'symbol': kline['s'],
|
||||
'exchange': 'binance',
|
||||
'interval': kline['i'],
|
||||
'open_price': Decimal(str(kline['o'])),
|
||||
'high_price': Decimal(str(kline['h'])),
|
||||
'low_price': Decimal(str(kline['l'])),
|
||||
'close_price': Decimal(str(kline['c'])),
|
||||
'volume': Decimal(str(kline['v'])),
|
||||
'quote_volume': Decimal(str(kline['q'])) if 'q' in kline else None,
|
||||
'trade_count': int(kline['n']) if 'n' in kline else None
|
||||
}
|
||||
|
||||
def parse_trade_data(data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Parse Binance trade data"""
|
||||
return {
|
||||
'time': format_timestamp(data['T']),
|
||||
'symbol': data['s'],
|
||||
'exchange': 'binance',
|
||||
'price': Decimal(str(data['p'])),
|
||||
'quantity': Decimal(str(data['q'])),
|
||||
'trade_id': int(data['t']),
|
||||
'is_buyer_maker': bool(data['m'])
|
||||
}
|
||||
|
||||
def calculate_technical_indicators(df: pd.DataFrame, indicators_config: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Calculate technical indicators using pandas_ta
|
||||
|
||||
Args:
|
||||
df: DataFrame with OHLCV data (index: time, columns: open, high, low, close, volume)
|
||||
indicators_config: Configuration for indicators to calculate
|
||||
|
||||
Returns:
|
||||
List of dictionaries with indicator data
|
||||
"""
|
||||
if len(df) < 50: # Need enough data for most indicators
|
||||
return []
|
||||
|
||||
# Create a copy and ensure proper data types
|
||||
df_ta = df.copy()
|
||||
|
||||
# Rename columns to match pandas_ta expectations if needed
|
||||
column_mapping = {
|
||||
'open_price': 'open',
|
||||
'high_price': 'high',
|
||||
'low_price': 'low',
|
||||
'close_price': 'close'
|
||||
}
|
||||
|
||||
for old_col, new_col in column_mapping.items():
|
||||
if old_col in df_ta.columns and new_col not in df_ta.columns:
|
||||
df_ta.rename(columns={old_col: new_col}, inplace=True)
|
||||
|
||||
# **CRITICAL FIX**: Convert all columns to float64 to avoid numba pyobject errors
|
||||
# This ensures pandas_ta's numba-compiled functions receive proper numeric types
|
||||
required_columns = ['open', 'high', 'low', 'close', 'volume']
|
||||
for col in required_columns:
|
||||
if col in df_ta.columns:
|
||||
df_ta[col] = pd.to_numeric(df_ta[col], errors='coerce').astype(np.float64)
|
||||
|
||||
# Remove any NaN values that may have been introduced
|
||||
df_ta = df_ta.dropna()
|
||||
|
||||
if len(df_ta) < 50: # Check again after cleaning
|
||||
return []
|
||||
|
||||
indicators_data = []
|
||||
enabled_indicators = indicators_config.get('enabled', [])
|
||||
periods = indicators_config.get('periods', {})
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
for indicator in enabled_indicators:
|
||||
if indicator == 'sma':
|
||||
# Simple Moving Average
|
||||
for period in periods.get('sma', [20]):
|
||||
try:
|
||||
sma_values = ta.sma(df_ta['close'], length=period)
|
||||
if sma_values is not None:
|
||||
for idx, value in sma_values.dropna().items():
|
||||
indicators_data.append({
|
||||
'time': idx,
|
||||
'indicator_name': f'sma_{period}',
|
||||
'indicator_value': round(float(value), 8),
|
||||
'metadata': json.dumps({'period': period})
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating SMA-{period}: {e}")
|
||||
|
||||
elif indicator == 'ema':
|
||||
# Exponential Moving Average
|
||||
for period in periods.get('ema', [12, 26]):
|
||||
try:
|
||||
ema_values = ta.ema(df_ta['close'], length=period)
|
||||
if ema_values is not None:
|
||||
for idx, value in ema_values.dropna().items():
|
||||
indicators_data.append({
|
||||
'time': idx,
|
||||
'indicator_name': f'ema_{period}',
|
||||
'indicator_value': round(float(value), 8),
|
||||
'metadata': json.dumps({'period': period})
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating EMA-{period}: {e}")
|
||||
|
||||
elif indicator == 'rsi':
|
||||
# Relative Strength Index
|
||||
for period in periods.get('rsi', [14]):
|
||||
try:
|
||||
rsi_values = ta.rsi(df_ta['close'], length=period)
|
||||
if rsi_values is not None:
|
||||
for idx, value in rsi_values.dropna().items():
|
||||
indicators_data.append({
|
||||
'time': idx,
|
||||
'indicator_name': f'rsi_{period}',
|
||||
'indicator_value': round(float(value), 8),
|
||||
'metadata': json.dumps({'period': period})
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating RSI-{period}: {e}")
|
||||
|
||||
elif indicator == 'macd':
|
||||
# MACD
|
||||
macd_config = periods.get('macd', {'fast': 12, 'slow': 26, 'signal': 9})
|
||||
try:
|
||||
macd_result = ta.macd(
|
||||
df_ta['close'],
|
||||
fast=macd_config['fast'],
|
||||
slow=macd_config['slow'],
|
||||
signal=macd_config['signal']
|
||||
)
|
||||
|
||||
if macd_result is not None:
|
||||
# MACD Line
|
||||
macd_col = f"MACD_{macd_config['fast']}_{macd_config['slow']}_{macd_config['signal']}"
|
||||
if macd_col in macd_result.columns:
|
||||
for idx, value in macd_result[macd_col].dropna().items():
|
||||
indicators_data.append({
|
||||
'time': idx,
|
||||
'indicator_name': 'macd_line',
|
||||
'indicator_value': round(float(value), 8),
|
||||
'metadata': json.dumps(macd_config)
|
||||
})
|
||||
|
||||
# MACD Signal
|
||||
signal_col = f"MACDs_{macd_config['fast']}_{macd_config['slow']}_{macd_config['signal']}"
|
||||
if signal_col in macd_result.columns:
|
||||
for idx, value in macd_result[signal_col].dropna().items():
|
||||
indicators_data.append({
|
||||
'time': idx,
|
||||
'indicator_name': 'macd_signal',
|
||||
'indicator_value': round(float(value), 8),
|
||||
'metadata': json.dumps(macd_config)
|
||||
})
|
||||
|
||||
# MACD Histogram
|
||||
hist_col = f"MACDh_{macd_config['fast']}_{macd_config['slow']}_{macd_config['signal']}"
|
||||
if hist_col in macd_result.columns:
|
||||
for idx, value in macd_result[hist_col].dropna().items():
|
||||
indicators_data.append({
|
||||
'time': idx,
|
||||
'indicator_name': 'macd_histogram',
|
||||
'indicator_value': round(float(value), 8),
|
||||
'metadata': json.dumps(macd_config)
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating MACD: {e}")
|
||||
|
||||
elif indicator == 'bb':
|
||||
# Bollinger Bands
|
||||
bb_config = periods.get('bb', {'period': 20, 'std': 2})
|
||||
try:
|
||||
bb_result = ta.bbands(
|
||||
df_ta['close'],
|
||||
length=bb_config['period'],
|
||||
std=bb_config['std']
|
||||
)
|
||||
|
||||
if bb_result is not None:
|
||||
# Upper Band
|
||||
for col in bb_result.columns:
|
||||
if col.startswith(f"BBU_{bb_config['period']}"):
|
||||
for idx, value in bb_result[col].dropna().items():
|
||||
indicators_data.append({
|
||||
'time': idx,
|
||||
'indicator_name': 'bb_upper',
|
||||
'indicator_value': round(float(value), 8),
|
||||
'metadata': json.dumps(bb_config)
|
||||
})
|
||||
break
|
||||
|
||||
# Middle Band
|
||||
for col in bb_result.columns:
|
||||
if col.startswith(f"BBM_{bb_config['period']}"):
|
||||
for idx, value in bb_result[col].dropna().items():
|
||||
indicators_data.append({
|
||||
'time': idx,
|
||||
'indicator_name': 'bb_middle',
|
||||
'indicator_value': round(float(value), 8),
|
||||
'metadata': json.dumps(bb_config)
|
||||
})
|
||||
break
|
||||
|
||||
# Lower Band
|
||||
for col in bb_result.columns:
|
||||
if col.startswith(f"BBL_{bb_config['period']}"):
|
||||
for idx, value in bb_result[col].dropna().items():
|
||||
indicators_data.append({
|
||||
'time': idx,
|
||||
'indicator_name': 'bb_lower',
|
||||
'indicator_value': round(float(value), 8),
|
||||
'metadata': json.dumps(bb_config)
|
||||
})
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating Bollinger Bands: {e}")
|
||||
|
||||
elif indicator == 'atr':
|
||||
# Average True Range
|
||||
for period in periods.get('atr', [14]):
|
||||
try:
|
||||
atr_values = ta.atr(df_ta['high'], df_ta['low'], df_ta['close'], length=period)
|
||||
if atr_values is not None:
|
||||
for idx, value in atr_values.dropna().items():
|
||||
indicators_data.append({
|
||||
'time': idx,
|
||||
'indicator_name': f'atr_{period}',
|
||||
'indicator_value': round(float(value), 8),
|
||||
'metadata': json.dumps({'period': period})
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating ATR-{period}: {e}")
|
||||
|
||||
elif indicator == 'stoch':
|
||||
# Stochastic Oscillator
|
||||
stoch_config = periods.get('stoch', {'k_period': 14, 'd_period': 3})
|
||||
try:
|
||||
stoch_result = ta.stoch(
|
||||
df_ta['high'], df_ta['low'], df_ta['close'],
|
||||
k=stoch_config['k_period'],
|
||||
d=stoch_config['d_period']
|
||||
)
|
||||
|
||||
if stoch_result is not None:
|
||||
# %K
|
||||
for col in stoch_result.columns:
|
||||
if 'STOCHk' in col:
|
||||
for idx, value in stoch_result[col].dropna().items():
|
||||
indicators_data.append({
|
||||
'time': idx,
|
||||
'indicator_name': 'stoch_k',
|
||||
'indicator_value': round(float(value), 8),
|
||||
'metadata': json.dumps(stoch_config)
|
||||
})
|
||||
break
|
||||
|
||||
# %D
|
||||
for col in stoch_result.columns:
|
||||
if 'STOCHd' in col:
|
||||
for idx, value in stoch_result[col].dropna().items():
|
||||
indicators_data.append({
|
||||
'time': idx,
|
||||
'indicator_name': 'stoch_d',
|
||||
'indicator_value': round(float(value), 8),
|
||||
'metadata': json.dumps(stoch_config)
|
||||
})
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating Stochastic: {e}")
|
||||
|
||||
elif indicator == 'adx':
|
||||
# Average Directional Index
|
||||
for period in periods.get('adx', [14]):
|
||||
try:
|
||||
adx_result = ta.adx(df_ta['high'], df_ta['low'], df_ta['close'], length=period)
|
||||
if adx_result is not None:
|
||||
adx_col = f"ADX_{period}"
|
||||
if adx_col in adx_result.columns:
|
||||
for idx, value in adx_result[adx_col].dropna().items():
|
||||
indicators_data.append({
|
||||
'time': idx,
|
||||
'indicator_name': f'adx_{period}',
|
||||
'indicator_value': round(float(value), 8),
|
||||
'metadata': json.dumps({'period': period})
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating ADX-{period}: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating technical indicators: {e}", exc_info=True)
|
||||
|
||||
return indicators_data
|
||||
|
||||
def resample_ticks_to_ohlcv(ticks: List[Dict[str, Any]], interval: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Resample tick data to OHLCV format
|
||||
|
||||
Args:
|
||||
ticks: List of tick data dictionaries
|
||||
interval: Resampling interval (e.g., '1min', '5min', '1H')
|
||||
|
||||
Returns:
|
||||
List of OHLCV dictionaries
|
||||
"""
|
||||
if not ticks:
|
||||
return []
|
||||
|
||||
# Convert to DataFrame
|
||||
df = pd.DataFrame(ticks)
|
||||
df['time'] = pd.to_datetime(df['time'])
|
||||
df.set_index('time', inplace=True)
|
||||
|
||||
# Convert price and quantity to float
|
||||
df['price'] = pd.to_numeric(df['price'], errors='coerce')
|
||||
df['quantity'] = pd.to_numeric(df['quantity'], errors='coerce')
|
||||
|
||||
# Group by symbol and resample
|
||||
ohlcv_data = []
|
||||
for symbol in df['symbol'].unique():
|
||||
symbol_df = df[df['symbol'] == symbol].copy()
|
||||
|
||||
# Resample price data
|
||||
ohlcv = symbol_df['price'].resample(interval).agg({
|
||||
'open': 'first',
|
||||
'high': 'max',
|
||||
'low': 'min',
|
||||
'close': 'last'
|
||||
})
|
||||
|
||||
# Resample volume and trade count
|
||||
volume = symbol_df['quantity'].resample(interval).sum()
|
||||
trade_count = symbol_df.resample(interval).size()
|
||||
|
||||
# Combine data
|
||||
for timestamp, row in ohlcv.iterrows():
|
||||
if pd.notna(row['open']): # Skip empty periods
|
||||
ohlcv_data.append({
|
||||
'time': timestamp,
|
||||
'symbol': symbol,
|
||||
'exchange': symbol_df['exchange'].iloc[0] if 'exchange' in symbol_df.columns else 'binance',
|
||||
'interval': interval,
|
||||
'open_price': Decimal(str(row['open'])).quantize(Decimal('0.00000001'), rounding=ROUND_HALF_UP),
|
||||
'high_price': Decimal(str(row['high'])).quantize(Decimal('0.00000001'), rounding=ROUND_HALF_UP),
|
||||
'low_price': Decimal(str(row['low'])).quantize(Decimal('0.00000001'), rounding=ROUND_HALF_UP),
|
||||
'close_price': Decimal(str(row['close'])).quantize(Decimal('0.00000001'), rounding=ROUND_HALF_UP),
|
||||
'volume': Decimal(str(volume.loc[timestamp])) if timestamp in volume.index else Decimal('0'),
|
||||
'quote_volume': None,
|
||||
'trade_count': int(trade_count.loc[timestamp]) if timestamp in trade_count.index else 0
|
||||
})
|
||||
|
||||
return ohlcv_data
|
||||
|
||||
def validate_ohlcv_data(ohlcv: Dict[str, Any]) -> bool:
|
||||
"""Validate OHLCV data integrity"""
|
||||
try:
|
||||
# Check required fields
|
||||
required_fields = ['time', 'symbol', 'open_price', 'high_price', 'low_price', 'close_price', 'volume']
|
||||
for field in required_fields:
|
||||
if field not in ohlcv:
|
||||
return False
|
||||
|
||||
# Check price relationships
|
||||
high = float(ohlcv['high_price'])
|
||||
low = float(ohlcv['low_price'])
|
||||
open_price = float(ohlcv['open_price'])
|
||||
close = float(ohlcv['close_price'])
|
||||
|
||||
# High should be >= all other prices
|
||||
if high < max(low, open_price, close):
|
||||
return False
|
||||
|
||||
# Low should be <= all other prices
|
||||
if low > min(high, open_price, close):
|
||||
return False
|
||||
|
||||
# All prices should be positive
|
||||
if any(price <= 0 for price in [high, low, open_price, close]):
|
||||
return False
|
||||
|
||||
# Volume should be non-negative
|
||||
if float(ohlcv['volume']) < 0:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except (ValueError, TypeError, KeyError):
|
||||
return False
|
||||
|
||||
def calculate_price_change(current_price: float, previous_price: float) -> Dict[str, float]:
|
||||
"""Calculate price change and percentage change"""
|
||||
if previous_price == 0:
|
||||
return {'change': 0.0, 'change_percent': 0.0}
|
||||
|
||||
change = current_price - previous_price
|
||||
change_percent = (change / previous_price) * 100
|
||||
|
||||
return {
|
||||
'change': round(change, 8),
|
||||
'change_percent': round(change_percent, 4)
|
||||
}
|
||||
|
||||
def format_volume(volume: Union[int, float, Decimal]) -> str:
|
||||
"""Format volume for display"""
|
||||
volume = float(volume)
|
||||
|
||||
if volume >= 1e9:
|
||||
return f"{volume / 1e9:.2f}B"
|
||||
elif volume >= 1e6:
|
||||
return f"{volume / 1e6:.2f}M"
|
||||
elif volume >= 1e3:
|
||||
return f"{volume / 1e3:.2f}K"
|
||||
else:
|
||||
return f"{volume:.2f}"
|
||||
|
||||
def get_interval_seconds(interval: str) -> int:
|
||||
"""Convert interval string to seconds"""
|
||||
interval_map = {
|
||||
'1s': 1,
|
||||
'1m': 60,
|
||||
'3m': 180,
|
||||
'5m': 300,
|
||||
'15m': 900,
|
||||
'30m': 1800,
|
||||
'1h': 3600,
|
||||
'2h': 7200,
|
||||
'4h': 14400,
|
||||
'6h': 21600,
|
||||
'8h': 28800,
|
||||
'12h': 43200,
|
||||
'1d': 86400,
|
||||
'3d': 259200,
|
||||
'1w': 604800,
|
||||
'1M': 2592000 # Approximate
|
||||
}
|
||||
|
||||
return interval_map.get(interval, 60) # Default to 1 minute
|
||||
|
||||
def safe_decimal_conversion(value: Any) -> Optional[Decimal]:
|
||||
"""Safely convert value to Decimal"""
|
||||
try:
|
||||
if value is None or value == '':
|
||||
return None
|
||||
return Decimal(str(value)).quantize(Decimal('0.00000001'), rounding=ROUND_HALF_UP)
|
||||
except (ValueError, TypeError, DecimalException):
|
||||
return None
|
||||
|
||||
def batch_data(data: List[Any], batch_size: int) -> List[List[Any]]:
|
||||
"""Split data into batches"""
|
||||
batches = []
|
||||
for i in range(0, len(data), batch_size):
|
||||
batches.append(data[i:i + batch_size])
|
||||
return batches
|
||||
|
||||
def get_binance_symbol_info(symbol: str) -> Dict[str, Any]:
|
||||
"""Get symbol information for validation"""
|
||||
# This is a simplified version - in production you might want to fetch from Binance API
|
||||
common_symbols = {
|
||||
'BTCUSDT': {'baseAsset': 'BTC', 'quoteAsset': 'USDT', 'status': 'TRADING'},
|
||||
'ETHUSDT': {'baseAsset': 'ETH', 'quoteAsset': 'USDT', 'status': 'TRADING'},
|
||||
'BNBUSDT': {'baseAsset': 'BNB', 'quoteAsset': 'USDT', 'status': 'TRADING'},
|
||||
'XRPUSDT': {'baseAsset': 'XRP', 'quoteAsset': 'USDT', 'status': 'TRADING'},
|
||||
'SOLUSDT': {'baseAsset': 'SOL', 'quoteAsset': 'USDT', 'status': 'TRADING'},
|
||||
'ADAUSDT': {'baseAsset': 'ADA', 'quoteAsset': 'USDT', 'status': 'TRADING'},
|
||||
'DOTUSDT': {'baseAsset': 'DOT', 'quoteAsset': 'USDT', 'status': 'TRADING'},
|
||||
'LINKUSDT': {'baseAsset': 'LINK', 'quoteAsset': 'USDT', 'status': 'TRADING'},
|
||||
'LTCUSDT': {'baseAsset': 'LTC', 'quoteAsset': 'USDT', 'status': 'TRADING'},
|
||||
'HBARUSDT': {'baseAsset': 'HBAR', 'quoteAsset': 'USDT', 'status': 'TRADING'},
|
||||
'HBARBTC': {'baseAsset': 'HBAR', 'quoteAsset': 'BTC', 'status': 'TRADING'}
|
||||
}
|
||||
|
||||
return common_symbols.get(symbol, {'status': 'UNKNOWN'})
|
||||
|
||||
class DataValidator:
|
||||
"""Class for validating trading data"""
|
||||
|
||||
@staticmethod
|
||||
def validate_tick_data(tick: Dict[str, Any]) -> bool:
|
||||
"""Validate tick/trade data"""
|
||||
try:
|
||||
required_fields = ['time', 'symbol', 'price', 'quantity', 'trade_id']
|
||||
for field in required_fields:
|
||||
if field not in tick:
|
||||
return False
|
||||
|
||||
# Validate data types and ranges
|
||||
if float(tick['price']) <= 0:
|
||||
return False
|
||||
|
||||
if float(tick['quantity']) <= 0:
|
||||
return False
|
||||
|
||||
if not isinstance(tick['trade_id'], (int, str)):
|
||||
return False
|
||||
|
||||
if not validate_symbol(tick['symbol']):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except (ValueError, TypeError):
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def validate_indicators_data(indicators: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Validate and clean indicators data"""
|
||||
valid_indicators = []
|
||||
|
||||
for indicator in indicators:
|
||||
try:
|
||||
if ('time' in indicator and
|
||||
'indicator_name' in indicator and
|
||||
'indicator_value' in indicator):
|
||||
|
||||
# Check for valid numeric value
|
||||
value = float(indicator['indicator_value'])
|
||||
if not (np.isnan(value) or np.isinf(value)):
|
||||
valid_indicators.append(indicator)
|
||||
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
|
||||
return valid_indicators
|
||||
|
||||
def create_error_response(error_message: str, error_code: str = "GENERAL_ERROR") -> Dict[str, Any]:
|
||||
"""Create standardized error response"""
|
||||
return {
|
||||
"success": False,
|
||||
"error": {
|
||||
"code": error_code,
|
||||
"message": error_message,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
}
|
||||
|
||||
def create_success_response(data: Any = None, message: str = "Success") -> Dict[str, Any]:
|
||||
"""Create standardized success response"""
|
||||
response = {
|
||||
"success": True,
|
||||
"message": message,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
if data is not None:
|
||||
response["data"] = data
|
||||
|
||||
return response
|
||||
|
||||
class PerformanceTimer:
|
||||
"""Context manager for timing operations"""
|
||||
|
||||
def __init__(self, operation_name: str):
|
||||
self.operation_name = operation_name
|
||||
self.start_time = None
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def __enter__(self):
|
||||
self.start_time = datetime.utcnow()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.start_time:
|
||||
duration = (datetime.utcnow() - self.start_time).total_seconds()
|
||||
|
||||
# Log slow operations
|
||||
slow_threshold = float(os.getenv('SLOW_QUERY_THRESHOLD_MS', 1000)) / 1000
|
||||
|
||||
if duration > slow_threshold:
|
||||
self.logger.warning(f"SLOW OPERATION: {self.operation_name} took {duration:.3f}s")
|
||||
else:
|
||||
self.logger.debug(f"{self.operation_name} completed in {duration:.3f}s")
|
||||
|
||||
# Export main functions
|
||||
__all__ = [
|
||||
'setup_logging', 'load_config', 'save_config', 'validate_config',
|
||||
'create_default_config', 'validate_symbol', 'format_timestamp',
|
||||
'parse_kline_data', 'parse_trade_data', 'calculate_technical_indicators',
|
||||
'resample_ticks_to_ohlcv', 'validate_ohlcv_data', 'calculate_price_change',
|
||||
'format_volume', 'get_interval_seconds', 'safe_decimal_conversion',
|
||||
'batch_data', 'get_binance_symbol_info', 'DataValidator',
|
||||
'create_error_response', 'create_success_response', 'PerformanceTimer',
|
||||
'reload_env_vars'
|
||||
]
|
Reference in New Issue
Block a user