"""检查数据准确性"""
import pandas as pd
import sys
sys.stdout.reconfigure(encoding='utf-8')

df = pd.read_parquet(r'C:\Users\Administrator\.openclaw\workspace\letaro\data_model\sales.parquet')

print("=" * 50)
print("数据检查")
print("=" * 50)

# 1. 基本统计
print(f"总记录数: {len(df)}")
print(f"总subtotal: \u00a5{df['subtotal'].sum():,.2f}")
print(f"负值subtotal: {len(df[df['subtotal'] < 0])}")
print()

# 2. 查看is_refund列
print("is_refund列统计:")
print(df['is_refund'].value_counts())
print()

# 3. 没有退货标记但subtotal为负的
nf = df[(df['is_refund'] == False) & (df['subtotal'] < 0)]
print(f"没有退货标记但subtotal为负: {len(nf)}")
if len(nf) > 0:
    print(nf[['receipt_no','product_name','quantity','subtotal','sale_time','store_name']].head(10))
print()

# 4. 退货标记但subtotal为正
pf = df[(df['is_refund'] == True) & (df['subtotal'] > 0)]
print(f"退货标记但subtotal为正: {len(pf)}")

# 5. 看看所有"合计"列的用法
# 列名: total=折扣前金额, subtotal=折扣后金额
print("\n--- 合计 vs 小计 ---")
print(f"total总和: \u00a5{df['total'].sum():,.2f}")
print(f"subtotal总和: \u00a5{df['subtotal'].sum():,.2f}")

# 6. 真正正确的销售额：subtotal中正数的和（就是实际收的钱）
sales = df[df['subtotal'] > 0]
print(f"\n真正销售额（subtotal>0）: \u00a5{sales['subtotal'].sum():,.2f}")
print(f"行数: {len(sales)}")

refunds = df[df['subtotal'] < 0]
print(f"真正退款（subtotal<0）: \u00a5{refunds['subtotal'].sum():,.2f}")
print(f"行数: {len(refunds)}")

# 7. 看看日趋势
print("\n每天的实际销售额:")
sales['date'] = sales['sale_time'].dt.strftime('%Y-%m-%d')
daily = sales.groupby('date').agg(
    销售额=('subtotal', 'sum'),
    单数=('receipt_no', 'count'),
    件数=('quantity', 'sum')
).sort_index()
total = 0
for d, row in daily.iterrows():
    total += float(row['销售额'])
    print(f"  {d}: \u00a5{float(row['销售额']):,.0f}  单数={int(row['单数'])}")

print(f"\n汇总: \u00a5{total:,.0f}")

# 8. 再看看 quick_report 为什么会多
print("\n=== is_refund标记分析 ===")
print(f"is_refund=True且subtotal>0: {len(df[(df['is_refund']==True) & (df['subtotal']>0)])}")
print(f"is_refund=True且subtotal<=0: {len(df[(df['is_refund']==True) & (df['subtotal']<=0)])}")
print(f"is_refund=False且subtotal<0: {len(df[(df['is_refund']==False) & (df['subtotal']<0)])}")
print(f"is_refund=False且subtotal=0: {len(df[(df['is_refund']==False) & (df['subtotal']==0)])}")
print(f"is_refund=False且subtotal>0: {len(df[(df['is_refund']==False) & (df['subtotal']>0)])}")
