You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

154 lines
8.1 KiB

# -*- coding: utf-8 -*-
import os
from collections import defaultdict
from constant import schneider_columns
import pandas as pd
import numpy as np
alter_dbr_dir = '.\\alter_data\\'
data_dir = '.\\data\\'
pivot_data_dir = '.\\pivot\\'
def check_path_creat(path):
"""
检查路径是否存在,不存在就创建
:param path: 文件路径
:return:
"""
if not os.path.exists(path):
os.mkdir(path)
def discount_processing(filename:str, **kwargs):
check_path_creat(os.path.join(alter_dbr_dir, filename[:-4]))
filepath = os.path.join(data_dir, filename)
altername = '改_{}'.format(filename)
alterpath = os.path.join(alter_dbr_dir, filename[:-4], altername)
discount_3_li = kwargs.get('discount_3_li', [])
discount_8_li = kwargs.get('discount_8_li', [])
es_iid = kwargs.get('es_iid')
if not es_iid:
raise ValueError('请输入esInvoiceID')
es_total = 0
discount_dict = defaultdict(float)
spp_id = []
spp_total = 0
with open(filepath, 'r', encoding='utf-8') as r, open(alterpath, 'w', encoding='utf-8') as w:
while True:
line = r.readline()
if not line:
break
data = line[1:-2].split('","')
line_dict = dict(zip(list(schneider_columns), data))
if line_dict['InvoiceID']in spp_id:
continue
if line_dict['ItemDescription'].startswith('AWS Solution Provider Program Discount'):
spp_id.append(line_dict['InvoiceID'])
spp_total += float(line_dict['UnBlendedCost'])
continue
if line_dict['InvoiceID'] in discount_8_li:
if not line_dict['RecordType'] == 'InvoiceTotal':
discount_dict[line_dict['LinkedAccountId']] += float(line_dict['BlendedCost']) / 8 * 2
if not line_dict['ItemDescription'] == '税金 VAT 类型':
line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) / 8 * 6)
line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) / 8 * 6)
else:
line_dict['BlendedCost'] = '{:.2f}'.format(float(line_dict['BlendedCost']) / 8 * 6)
line_dict['UnBlendedCost'] = '{:.2f}'.format(float(line_dict['UnBlendedCost']) / 8 * 6)
elif line_dict['InvoiceID'] in discount_3_li:
if not line_dict['RecordType'] == 'InvoiceTotal':
discount_dict[line_dict['LinkedAccountId']] += float(line_dict['BlendedCost']) / 3 * 2
if not line_dict['ItemDescription'] == '税金 VAT 类型':
line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) / 3)
line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) / 3)
else:
line_dict['BlendedCost'] = '{:.2f}'.format(float(line_dict['BlendedCost']) / 3)
line_dict['UnBlendedCost'] = '{:.2f}'.format(float(line_dict['UnBlendedCost']) / 3)
elif line_dict['InvoiceID'] == es_iid and not line_dict['RecordType'] == 'InvoiceTotal':
line_dict['LinkedAccountId'] = '056625042987'
es_total += float(line_dict['UnBlendedCost'])
elif line_dict['RecordType'] == 'AccountTotal':
if line_dict['LinkedAccountId'] in discount_dict.keys():
line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost'])
- float(discount_dict[line_dict['LinkedAccountId']]))
line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost'])
- float(discount_dict[line_dict['LinkedAccountId']]))
if line_dict['LinkedAccountId'] == '056625042987':
line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) + es_total)
line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) + es_total)
elif line_dict['LinkedAccountId'] == '028328046529':
line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost']) - es_total - (spp_total * 1.06))
line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost']) - es_total - (spp_total * 1.06))
elif line_dict['RecordType'] == 'StatementTotal':
line_dict['BlendedCost'] = '{:.6f}'.format(float(line_dict['BlendedCost'])
- sum(list(discount_dict.values())) - (spp_total * 1.06))
print('total:{}'.format(float(line_dict['UnBlendedCost']) - sum(list(discount_dict.values())) - (spp_total * 1.06)))
line_dict['UnBlendedCost'] = '{:.6f}'.format(float(line_dict['UnBlendedCost'])
- sum(list(discount_dict.values())) - (spp_total * 1.06))
w.write('"' + '","'.join(list(line_dict.values())) + '"' + '\n')
print('es_total:{}'.format(es_total))
print('spp_total:{}'.format(spp_total))
print('{}折扣处理完成'.format(filename))
def get_invoiceid(filename):
filepath = os.path.join(data_dir, filename)
reader = pd.read_csv(
filepath,
iterator=True,
sep=',',
chunksize=10000,
header=None,
names=schneider_columns,
dtype={
'LinkedAccountId': np.object,
'InvoiceID': np.object
},
low_memory=False)
for result in reader:
data = result[result['ItemDescription'].str.contains(
'Enterprise Support for month of')]
if not data.empty:
print('InvoiceID:{}'.format(data.iloc[0]['InvoiceID']))
return data.iloc[0]['InvoiceID']
raise ValueError('未找到ES的 InvoiceID 请检查文件')
def more_line_aggregation(filename):
params = ['RecordType', 'LinkedAccountId', 'ProductName']
lines = ['UnBlendedCost']
altername = '改_{}'.format(filename)
alterpath = os.path.join(alter_dbr_dir, filename[:-4], altername)
reader = pd.read_csv(alterpath, iterator=True, sep=',', chunksize=10000, low_memory=False)
first_line = reader.get_chunk(1)
if len(first_line.columns) == 1:
reader = pd.read_csv(alterpath, iterator=True, sep=',', skiprows=[0, ], chunksize=10000, low_memory=False)
final_df = pd.DataFrame()
print('{}开始分析'.format(filename))
for data in reader:
for line_name in lines:
data[line_name] = data[line_name].apply(pd.to_numeric, errors='coerce').fillna(0.0)
for param in params:
data[param] = data[param].fillna('')
chunk_df = data.groupby(by=params).agg({i: 'sum' for i in lines})
final_df = pd.concat([final_df, chunk_df], axis=0)
final_df = final_df.groupby(by=params).agg({i: 'sum' for i in lines})
for line in lines:
final_df[line] = final_df[line].apply(lambda x: '{:.6f}'.format(x))
dir, file = os.path.split(alterpath)
filepath = os.path.join(pivot_data_dir, file)
final_df.to_csv(filepath, mode='w', encoding='UTF-8', quoting=1)
print('{}分析完成'.format(filepath))
if __name__ == '__main__':
# 文件名称 北京,宁夏分开出
filename = '028328046529-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-2022-04.csv'
# filename = '028328046529-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-2022-04.csv'
# 折扣为3%的InvoiceID列表
discount_3_li = ['1342131171', '1342131207', '1342131623', '1342131195', '1342131627', '1342131651', '1342131187',
'1342131639', '1342131199', '1342131631', '1342131643', '1342131235', '1342131659', '1342131219',
'1342131215', '1342131231']
# 折扣为8%的InvoiceID列表
discount_8_li = ['1342131247', '1342131655', '1342131663', '1342131667']
es_iid = get_invoiceid(filename)
discount_processing(filename, discount_3_li=discount_3_li, discount_8_li=discount_8_li, es_iid=es_iid)
more_line_aggregation(filename)