# -*- coding: utf-8 -*- import os from collections import defaultdict from constant import schneider_columns import pandas as pd import numpy as np alter_dbr_dir = '.\\alter_data\\' data_dir = '.\\data\\' pivot_data_dir = '.\\pivot\\' def check_path_creat(path): """ 检查路径是否存在,不存在就创建 :param path: 文件路径 :return: """ if not os.path.exists(path): os.mkdir(path) def discount_processing(filename:str, **kwargs): check_path_creat(os.path.join(alter_dbr_dir, filename[:-4])) filepath = os.path.join(data_dir, filename) altername = '改_{}'.format(filename) alterpath = os.path.join(alter_dbr_dir, filename[:-4], altername) discount_3_li = kwargs.get('discount_3_li', []) discount_8_li = kwargs.get('discount_8_li', []) es_iid = kwargs.get('es_iid') if not es_iid: raise ValueError('请输入esInvoiceID') es_total = 0 discount_dict = defaultdict(float) spp_id = [] spp_total = 0 with open(filepath, 'r', encoding='utf-8') as r, open(alterpath, 'w', encoding='utf-8') as w: while True: line = r.readline() if not line: break data = line[1:-2].split('","') line_dict = dict(zip(list(schneider_columns), data)) if line_dict['InvoiceID']in spp_id: continue if line_dict['ItemDescription'].startswith('AWS Solution Provider Program Discount'): spp_id.append(line_dict['InvoiceID']) spp_total += float(line_dict['UnBlendedCost']) continue if line_dict['InvoiceID'] in discount_8_li: if not line_dict['RecordType'] == 'InvoiceTotal': discount_dict[line_dict['LinkedAccountId']] += float(line_dict['BlendedCost']) / 8 * 2 if not line_dict['ItemDescription'] == '税金 VAT 类型': line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) / 8 * 6) line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) / 8 * 6) else: line_dict['BlendedCost'] = '{:.2f}'.format(float(line_dict['BlendedCost']) / 8 * 6) line_dict['UnBlendedCost'] = '{:.2f}'.format(float(line_dict['UnBlendedCost']) / 8 * 6) elif line_dict['InvoiceID'] in discount_3_li: if not line_dict['RecordType'] == 'InvoiceTotal': discount_dict[line_dict['LinkedAccountId']] += float(line_dict['BlendedCost']) / 3 * 2 if not line_dict['ItemDescription'] == '税金 VAT 类型': line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) / 3) line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) / 3) else: line_dict['BlendedCost'] = '{:.2f}'.format(float(line_dict['BlendedCost']) / 3) line_dict['UnBlendedCost'] = '{:.2f}'.format(float(line_dict['UnBlendedCost']) / 3) elif line_dict['InvoiceID'] == es_iid and not line_dict['RecordType'] == 'InvoiceTotal': line_dict['LinkedAccountId'] = '056625042987' es_total += float(line_dict['UnBlendedCost']) elif line_dict['RecordType'] == 'AccountTotal': if line_dict['LinkedAccountId'] in discount_dict.keys(): line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) - float(discount_dict[line_dict['LinkedAccountId']])) line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) - float(discount_dict[line_dict['LinkedAccountId']])) if line_dict['LinkedAccountId'] == '056625042987': line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) + es_total) line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) + es_total) elif line_dict['LinkedAccountId'] == '028328046529': line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) - es_total - (spp_total * 1.06)) line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) - es_total - (spp_total * 1.06)) elif line_dict['RecordType'] == 'StatementTotal': line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) - sum(list(discount_dict.values())) - (spp_total * 1.06)) print('total:{}'.format(float(line_dict['UnBlendedCost']) - sum(list(discount_dict.values())) - (spp_total * 1.06))) line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) - sum(list(discount_dict.values())) - (spp_total * 1.06)) w.write('"' + '","'.join(list(line_dict.values())) + '"' + '\n') print('es_total:{}'.format(es_total)) print('spp_total:{}'.format(spp_total)) print('{}折扣处理完成'.format(filename)) def get_invoiceid(filename): filepath = os.path.join(data_dir, filename) reader = pd.read_csv( filepath, iterator=True, sep=',', chunksize=10000, header=None, names=schneider_columns, dtype={ 'LinkedAccountId': np.object, 'InvoiceID': np.object }, low_memory=False) for result in reader: data = result[result['ItemDescription'].str.contains( 'Enterprise Support for month of')] if not data.empty: print('InvoiceID:{}'.format(data.iloc[0]['InvoiceID'])) return data.iloc[0]['InvoiceID'] raise ValueError('未找到ES的 InvoiceID 请检查文件') def more_line_aggregation(filename): params = ['RecordType', 'LinkedAccountId', 'ProductName'] lines = ['UnBlendedCost'] altername = '改_{}'.format(filename) alterpath = os.path.join(alter_dbr_dir, filename[:-4], altername) reader = pd.read_csv(alterpath, iterator=True, sep=',', chunksize=10000, low_memory=False) first_line = reader.get_chunk(1) if len(first_line.columns) == 1: reader = pd.read_csv(alterpath, iterator=True, sep=',', skiprows=[0, ], chunksize=10000, low_memory=False) final_df = pd.DataFrame() print('{}开始分析'.format(filename)) for data in reader: for line_name in lines: data[line_name] = data[line_name].apply(pd.to_numeric, errors='coerce').fillna(0.0) for param in params: data[param] = data[param].fillna('') chunk_df = data.groupby(by=params).agg({i: 'sum' for i in lines}) final_df = pd.concat([final_df, chunk_df], axis=0) final_df = final_df.groupby(by=params).agg({i: 'sum' for i in lines}) for line in lines: final_df[line] = final_df[line].apply(lambda x: '{:.6f}'.format(x)) dir, file = os.path.split(alterpath) filepath = os.path.join(pivot_data_dir, file) final_df.to_csv(filepath, mode='w', encoding='UTF-8', quoting=1) print('{}分析完成'.format(filepath)) if __name__ == '__main__': # 文件名称 北京,宁夏分开出 filename = '028328046529-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-2022-04.csv' # filename = '028328046529-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-2022-04.csv' # 折扣为3%的InvoiceID列表 discount_3_li = ['1342131171', '1342131207', '1342131623', '1342131195', '1342131627', '1342131651', '1342131187', '1342131639', '1342131199', '1342131631', '1342131643', '1342131235', '1342131659', '1342131219', '1342131215', '1342131231'] # 折扣为8%的InvoiceID列表 discount_8_li = ['1342131247', '1342131655', '1342131663', '1342131667'] es_iid = get_invoiceid(filename) discount_processing(filename, discount_3_li=discount_3_li, discount_8_li=discount_8_li, es_iid=es_iid) more_line_aggregation(filename)