zhangdanjiaoben/大客户账单/施耐德/施耐德.py

# -*- coding: utf-8 -*-
import os
from collections import defaultdict
from constant import schneider_columns
import pandas as pd
import numpy as np

alter_dbr_dir = '.\\alter_data\\'
data_dir = '.\\data\\'
pivot_data_dir = '.\\pivot\\'


def check_path_creat(path):
    """
    检查路径是否存在，不存在就创建
    :param path: 文件路径
    :return:
    """
    if not os.path.exists(path):
        os.mkdir(path)

def discount_processing(filename:str, **kwargs):
    check_path_creat(os.path.join(alter_dbr_dir, filename[:-4]))
    filepath = os.path.join(data_dir, filename)
    altername = '改_{}'.format(filename)
    alterpath = os.path.join(alter_dbr_dir, filename[:-4], altername)
    discount_3_li = kwargs.get('discount_3_li', [])
    discount_8_li = kwargs.get('discount_8_li', [])
    es_iid = kwargs.get('es_iid')
    if not es_iid:
        raise ValueError('请输入esInvoiceID')
    es_total = 0
    discount_dict = defaultdict(float)
    spp_id = []
    spp_total = 0
    with open(filepath, 'r', encoding='utf-8') as r, open(alterpath, 'w', encoding='utf-8') as w:
        while True:
            line = r.readline()
            if not line:
                break
            data = line[1:-2].split('","')
            line_dict = dict(zip(list(schneider_columns), data))
            if line_dict['InvoiceID']in spp_id:
                continue
            if line_dict['ItemDescription'].startswith('AWS Solution Provider Program Discount'):
                spp_id.append(line_dict['InvoiceID'])
                spp_total += float(line_dict['UnBlendedCost'])
                continue
            if line_dict['InvoiceID'] in discount_8_li:
                if not line_dict['RecordType'] == 'InvoiceTotal':
                    discount_dict[line_dict['LinkedAccountId']] += float(line_dict['BlendedCost']) / 8 * 2
                if not line_dict['ItemDescription'] == '税金 VAT 类型':
                    line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) / 8 * 6)
                    line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) / 8 * 6)
                else:
                    line_dict['BlendedCost'] = '{:.2f}'.format(float(line_dict['BlendedCost']) / 8 * 6)
                    line_dict['UnBlendedCost'] = '{:.2f}'.format(float(line_dict['UnBlendedCost']) / 8 * 6)
            elif line_dict['InvoiceID'] in discount_3_li:
                if not line_dict['RecordType'] == 'InvoiceTotal':
                    discount_dict[line_dict['LinkedAccountId']] += float(line_dict['BlendedCost']) / 3 * 2
                if not line_dict['ItemDescription'] == '税金 VAT 类型':
                    line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) / 3)
                    line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) / 3)
                else:
                    line_dict['BlendedCost'] = '{:.2f}'.format(float(line_dict['BlendedCost']) / 3)
                    line_dict['UnBlendedCost'] = '{:.2f}'.format(float(line_dict['UnBlendedCost']) / 3)
            elif line_dict['InvoiceID'] == es_iid and not line_dict['RecordType'] == 'InvoiceTotal':
                line_dict['LinkedAccountId'] = '056625042987'
                es_total += float(line_dict['UnBlendedCost'])
            elif line_dict['RecordType'] == 'AccountTotal':
                if line_dict['LinkedAccountId'] in discount_dict.keys():
                    line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost'])
                                                               - float(discount_dict[line_dict['LinkedAccountId']]))
                    line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost'])
                                                               - float(discount_dict[line_dict['LinkedAccountId']]))
                if line_dict['LinkedAccountId'] == '056625042987':
                    line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) + es_total)
                    line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) + es_total)
                elif line_dict['LinkedAccountId'] == '028328046529':
                    line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) - es_total - (spp_total * 1.06))
                    line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) - es_total - (spp_total * 1.06))
            elif line_dict['RecordType'] == 'StatementTotal':
                line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost'])
                                                           - sum(list(discount_dict.values())) - (spp_total * 1.06))
                print('total:{}'.format(float(line_dict['UnBlendedCost']) - sum(list(discount_dict.values())) - (spp_total * 1.06)))
                line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost'])
                                                             - sum(list(discount_dict.values())) - (spp_total * 1.06))
            w.write('"' + '","'.join(list(line_dict.values())) + '"' + '\n')
        print('es_total:{}'.format(es_total))
        print('spp_total:{}'.format(spp_total))
    print('{}折扣处理完成'.format(filename))

def get_invoiceid(filename):
    filepath = os.path.join(data_dir, filename)
    reader = pd.read_csv(
        filepath,
        iterator=True,
        sep=',',
        chunksize=10000,
        header=None,
        names=schneider_columns,
        dtype={
            'LinkedAccountId': np.object,
            'InvoiceID': np.object
        },
        low_memory=False)
    for result in reader:
        data = result[result['ItemDescription'].str.contains(
            'Enterprise Support for month of')]
        if not data.empty:
            print('InvoiceID:{}'.format(data.iloc[0]['InvoiceID']))
            return data.iloc[0]['InvoiceID']
    raise ValueError('未找到ES的 InvoiceID 请检查文件')


def more_line_aggregation(filename):
    params = ['RecordType', 'LinkedAccountId', 'ProductName']
    lines = ['UnBlendedCost']
    altername = '改_{}'.format(filename)
    alterpath = os.path.join(alter_dbr_dir, filename[:-4], altername)
    reader = pd.read_csv(alterpath, iterator=True, sep=',', chunksize=10000, low_memory=False)
    first_line = reader.get_chunk(1)
    if len(first_line.columns) == 1:
        reader = pd.read_csv(alterpath, iterator=True, sep=',', skiprows=[0, ], chunksize=10000, low_memory=False)
    final_df = pd.DataFrame()
    print('{}开始分析'.format(filename))
    for data in reader:
        for line_name in lines:
            data[line_name] = data[line_name].apply(pd.to_numeric, errors='coerce').fillna(0.0)
        for param in params:
            data[param] = data[param].fillna('')
        chunk_df = data.groupby(by=params).agg({i: 'sum' for i in lines})
        final_df = pd.concat([final_df, chunk_df], axis=0)
        final_df = final_df.groupby(by=params).agg({i: 'sum' for i in lines})
    for line in lines:
        final_df[line] = final_df[line].apply(lambda x: '{:.6f}'.format(x))
    dir, file = os.path.split(alterpath)
    filepath = os.path.join(pivot_data_dir, file)
    final_df.to_csv(filepath, mode='w', encoding='UTF-8', quoting=1)
    print('{}分析完成'.format(filepath))

if __name__ == '__main__':
    # 文件名称 北京，宁夏分开出
    filename = '028328046529-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-2022-04.csv'
    # filename = '028328046529-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-2022-04.csv'
    # 折扣为3%的InvoiceID列表
    discount_3_li = ['1342131171', '1342131207', '1342131623', '1342131195', '1342131627', '1342131651', '1342131187',
                     '1342131639', '1342131199', '1342131631', '1342131643', '1342131235', '1342131659', '1342131219',
                     '1342131215', '1342131231']
    # 折扣为8%的InvoiceID列表
    discount_8_li = ['1342131247', '1342131655', '1342131663', '1342131667']
    es_iid = get_invoiceid(filename)
    discount_processing(filename, discount_3_li=discount_3_li, discount_8_li=discount_8_li, es_iid=es_iid)
    more_line_aggregation(filename)