You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
154 lines
8.1 KiB
154 lines
8.1 KiB
# -*- coding: utf-8 -*-
|
|
import os
|
|
from collections import defaultdict
|
|
from constant import schneider_columns
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
alter_dbr_dir = '.\\alter_data\\'
|
|
data_dir = '.\\data\\'
|
|
pivot_data_dir = '.\\pivot\\'
|
|
|
|
|
|
def check_path_creat(path):
|
|
"""
|
|
检查路径是否存在,不存在就创建
|
|
:param path: 文件路径
|
|
:return:
|
|
"""
|
|
if not os.path.exists(path):
|
|
os.mkdir(path)
|
|
|
|
def discount_processing(filename:str, **kwargs):
|
|
check_path_creat(os.path.join(alter_dbr_dir, filename[:-4]))
|
|
filepath = os.path.join(data_dir, filename)
|
|
altername = '改_{}'.format(filename)
|
|
alterpath = os.path.join(alter_dbr_dir, filename[:-4], altername)
|
|
discount_3_li = kwargs.get('discount_3_li', [])
|
|
discount_8_li = kwargs.get('discount_8_li', [])
|
|
es_iid = kwargs.get('es_iid')
|
|
if not es_iid:
|
|
raise ValueError('请输入esInvoiceID')
|
|
es_total = 0
|
|
discount_dict = defaultdict(float)
|
|
spp_id = []
|
|
spp_total = 0
|
|
with open(filepath, 'r', encoding='utf-8') as r, open(alterpath, 'w', encoding='utf-8') as w:
|
|
while True:
|
|
line = r.readline()
|
|
if not line:
|
|
break
|
|
data = line[1:-2].split('","')
|
|
line_dict = dict(zip(list(schneider_columns), data))
|
|
if line_dict['InvoiceID']in spp_id:
|
|
continue
|
|
if line_dict['ItemDescription'].startswith('AWS Solution Provider Program Discount'):
|
|
spp_id.append(line_dict['InvoiceID'])
|
|
spp_total += float(line_dict['UnBlendedCost'])
|
|
continue
|
|
if line_dict['InvoiceID'] in discount_8_li:
|
|
if not line_dict['RecordType'] == 'InvoiceTotal':
|
|
discount_dict[line_dict['LinkedAccountId']] += float(line_dict['BlendedCost']) / 8 * 2
|
|
if not line_dict['ItemDescription'] == '税金 VAT 类型':
|
|
line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) / 8 * 6)
|
|
line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) / 8 * 6)
|
|
else:
|
|
line_dict['BlendedCost'] = '{:.2f}'.format(float(line_dict['BlendedCost']) / 8 * 6)
|
|
line_dict['UnBlendedCost'] = '{:.2f}'.format(float(line_dict['UnBlendedCost']) / 8 * 6)
|
|
elif line_dict['InvoiceID'] in discount_3_li:
|
|
if not line_dict['RecordType'] == 'InvoiceTotal':
|
|
discount_dict[line_dict['LinkedAccountId']] += float(line_dict['BlendedCost']) / 3 * 2
|
|
if not line_dict['ItemDescription'] == '税金 VAT 类型':
|
|
line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) / 3)
|
|
line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) / 3)
|
|
else:
|
|
line_dict['BlendedCost'] = '{:.2f}'.format(float(line_dict['BlendedCost']) / 3)
|
|
line_dict['UnBlendedCost'] = '{:.2f}'.format(float(line_dict['UnBlendedCost']) / 3)
|
|
elif line_dict['InvoiceID'] == es_iid and not line_dict['RecordType'] == 'InvoiceTotal':
|
|
line_dict['LinkedAccountId'] = '056625042987'
|
|
es_total += float(line_dict['UnBlendedCost'])
|
|
elif line_dict['RecordType'] == 'AccountTotal':
|
|
if line_dict['LinkedAccountId'] in discount_dict.keys():
|
|
line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost'])
|
|
- float(discount_dict[line_dict['LinkedAccountId']]))
|
|
line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost'])
|
|
- float(discount_dict[line_dict['LinkedAccountId']]))
|
|
if line_dict['LinkedAccountId'] == '056625042987':
|
|
line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) + es_total)
|
|
line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) + es_total)
|
|
elif line_dict['LinkedAccountId'] == '028328046529':
|
|
line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) - es_total - (spp_total * 1.06))
|
|
line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) - es_total - (spp_total * 1.06))
|
|
elif line_dict['RecordType'] == 'StatementTotal':
|
|
line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost'])
|
|
- sum(list(discount_dict.values())) - (spp_total * 1.06))
|
|
print('total:{}'.format(float(line_dict['UnBlendedCost']) - sum(list(discount_dict.values())) - (spp_total * 1.06)))
|
|
line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost'])
|
|
- sum(list(discount_dict.values())) - (spp_total * 1.06))
|
|
w.write('"' + '","'.join(list(line_dict.values())) + '"' + '\n')
|
|
print('es_total:{}'.format(es_total))
|
|
print('spp_total:{}'.format(spp_total))
|
|
print('{}折扣处理完成'.format(filename))
|
|
|
|
def get_invoiceid(filename):
|
|
filepath = os.path.join(data_dir, filename)
|
|
reader = pd.read_csv(
|
|
filepath,
|
|
iterator=True,
|
|
sep=',',
|
|
chunksize=10000,
|
|
header=None,
|
|
names=schneider_columns,
|
|
dtype={
|
|
'LinkedAccountId': np.object,
|
|
'InvoiceID': np.object
|
|
},
|
|
low_memory=False)
|
|
for result in reader:
|
|
data = result[result['ItemDescription'].str.contains(
|
|
'Enterprise Support for month of')]
|
|
if not data.empty:
|
|
print('InvoiceID:{}'.format(data.iloc[0]['InvoiceID']))
|
|
return data.iloc[0]['InvoiceID']
|
|
raise ValueError('未找到ES的 InvoiceID 请检查文件')
|
|
|
|
|
|
def more_line_aggregation(filename):
|
|
params = ['RecordType', 'LinkedAccountId', 'ProductName']
|
|
lines = ['UnBlendedCost']
|
|
altername = '改_{}'.format(filename)
|
|
alterpath = os.path.join(alter_dbr_dir, filename[:-4], altername)
|
|
reader = pd.read_csv(alterpath, iterator=True, sep=',', chunksize=10000, low_memory=False)
|
|
first_line = reader.get_chunk(1)
|
|
if len(first_line.columns) == 1:
|
|
reader = pd.read_csv(alterpath, iterator=True, sep=',', skiprows=[0, ], chunksize=10000, low_memory=False)
|
|
final_df = pd.DataFrame()
|
|
print('{}开始分析'.format(filename))
|
|
for data in reader:
|
|
for line_name in lines:
|
|
data[line_name] = data[line_name].apply(pd.to_numeric, errors='coerce').fillna(0.0)
|
|
for param in params:
|
|
data[param] = data[param].fillna('')
|
|
chunk_df = data.groupby(by=params).agg({i: 'sum' for i in lines})
|
|
final_df = pd.concat([final_df, chunk_df], axis=0)
|
|
final_df = final_df.groupby(by=params).agg({i: 'sum' for i in lines})
|
|
for line in lines:
|
|
final_df[line] = final_df[line].apply(lambda x: '{:.6f}'.format(x))
|
|
dir, file = os.path.split(alterpath)
|
|
filepath = os.path.join(pivot_data_dir, file)
|
|
final_df.to_csv(filepath, mode='w', encoding='UTF-8', quoting=1)
|
|
print('{}分析完成'.format(filepath))
|
|
|
|
if __name__ == '__main__':
|
|
# 文件名称 北京,宁夏分开出
|
|
filename = '028328046529-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-2022-04.csv'
|
|
# filename = '028328046529-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-2022-04.csv'
|
|
# 折扣为3%的InvoiceID列表
|
|
discount_3_li = ['1342131171', '1342131207', '1342131623', '1342131195', '1342131627', '1342131651', '1342131187',
|
|
'1342131639', '1342131199', '1342131631', '1342131643', '1342131235', '1342131659', '1342131219',
|
|
'1342131215', '1342131231']
|
|
# 折扣为8%的InvoiceID列表
|
|
discount_8_li = ['1342131247', '1342131655', '1342131663', '1342131667']
|
|
es_iid = get_invoiceid(filename)
|
|
discount_processing(filename, discount_3_li=discount_3_li, discount_8_li=discount_8_li, es_iid=es_iid)
|
|
more_line_aggregation(filename) |