2020 Big Contest - 3 · Data Flows in You

2. MSPMA(Maximum Sales Probability Matching Algorithm)

2020년 6월 1일의 시간대별 상품 취급액 예측 후 MSPMA로 새로운 편성 확인

from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive

import pandas as pd
import numpy as np
import random
import sys
import re
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import RobustScaler

from keras.layers import Dense, Activation, Flatten, Conv1D, MaxPooling1D, AveragePooling1D, GlobalMaxPooling1D, Conv1DTranspose, concatenate, Input, add, Dropout, BatchNormalization
from tensorflow.keras import Model

/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
  import pandas.util.testing as tm

# 숫자 표기법 변환
pd.set_option('display.float_format', '{:.2f}'.format)

# 한글 깨짐 방지
plt.rc('font', family='Malgun Gothic')

# 도화지 크기 설정
plt.rcParams['figure.figsize'] = (8, 5)

1. 테스트 데이터의 6월 1일만 추출하여 데이터 셋 만들기

total_test = pd.read_csv('/content/drive/Shared drives/빅콘테스트2020/total_test.csv')

# 테스트 데이터에서 2020년 6월 1일의 row는 06시부터 02시(다음날이지만 한 싸이클로 포함)까지 79개
june_1st = total_test.iloc[:79, :]

# 표시된 것 이외에는 1시간에 3개 row씩 존재
product1 = june_1st.iloc[:3,:]
product2 = june_1st.iloc[3:6,:]
product3 = june_1st.iloc[6:9,:]
product4 = june_1st.iloc[9:12,:]
product5 = june_1st.iloc[12:15,:]
product6 = june_1st.iloc[15:18,:]
product7 = june_1st.iloc[18:21,:]
product8 = june_1st.iloc[21:27,:] # 6개
product9 = june_1st.iloc[27:28,:] # 1개
product10 = june_1st.iloc[28:31,:]
product11 = june_1st.iloc[31:34,:]
product12 = june_1st.iloc[34:40,:] # 6개
product13 = june_1st.iloc[40:43,:]
product14 = june_1st.iloc[43:46,:]
product15 = june_1st.iloc[46:49,:]
product16 = june_1st.iloc[49:58,:] # 9개
product17 = june_1st.iloc[58:67,:] # 9개
product18 = june_1st.iloc[67:73,:] # 6개
product19 = june_1st.iloc[73:76,:]
product20 = june_1st.iloc[76:79,:]

# 각 시간대별 예측을 위해 시간 변수를 바꿔서 늘린 테스트 데이터의 마더코드별 데이터셋 구축
product1_total = pd.DataFrame()
product2_total = pd.DataFrame()
product3_total = pd.DataFrame()
product4_total = pd.DataFrame()
product5_total = pd.DataFrame()
product6_total = pd.DataFrame()
product7_total = pd.DataFrame()
product8_total = pd.DataFrame()
product9_total = pd.DataFrame()
product10_total = pd.DataFrame()
product11_total = pd.DataFrame()
product12_total = pd.DataFrame()
product13_total = pd.DataFrame()
product14_total = pd.DataFrame()
product15_total = pd.DataFrame()
product16_total = pd.DataFrame()
product17_total = pd.DataFrame()
product18_total = pd.DataFrame()
product19_total = pd.DataFrame()
product20_total = pd.DataFrame()

# product1
for i in range(20):
  temp = product1.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 3
  product1_total = product1_total.append(temp)
  product1_total = product1_total.reset_index(drop=True)

# product2
for i in range(20):
  temp = product2.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 3
  product2_total = product2_total.append(temp)
  product2_total = product2_total.reset_index(drop=True)

# product3
for i in range(20):
  temp = product3.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 3
  product3_total = product3_total.append(temp)
  product3_total = product3_total.reset_index(drop=True)

# product4
for i in range(20):
  temp = product4.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 3
  product4_total = product4_total.append(temp)
  product4_total = product4_total.reset_index(drop=True)

# product5
for i in range(20):
  temp = product5.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 3
  product5_total = product5_total.append(temp)
  product5_total = product5_total.reset_index(drop=True)

# product6
for i in range(20):
  temp = product6.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 3
  product6_total = product6_total.append(temp)
  product6_total = product6_total.reset_index(drop=True)

# product7
for i in range(20):
  temp = product7.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 3
  product7_total = product7_total.append(temp)
  product7_total = product7_total.reset_index(drop=True)

# product8
for i in range(20):
  temp = product8.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+6)%24, (i+6)%24, (i+7)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 6
  product8_total = product8_total.append(temp)
  product8_total = product8_total.reset_index(drop=True)

# product9
for i in range(20):
  temp = product9.copy()
  temp['시'] = [(i+6)%24]
  temp['시간대'] = [str((i + 6)%24) + '시']
  product9_total = product9_total.append(temp)
  product9_total = product9_total.reset_index(drop=True)

# product10
for i in range(20):
  temp = product10.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 3
  product10_total = product10_total.append(temp)
  product10_total = product10_total.reset_index(drop=True)

# product11
for i in range(20):
  temp = product11.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 3
  product11_total = product11_total.append(temp)
  product11_total = product11_total.reset_index(drop=True)

# product12
for i in range(20):
  temp = product12.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+6)%24, (i+6)%24, (i+7)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 6
  product12_total = product12_total.append(temp)
  product12_total = product12_total.reset_index(drop=True)

# product13
for i in range(20):
  temp = product13.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 3
  product13_total = product13_total.append(temp)
  product13_total = product13_total.reset_index(drop=True)

# product14
for i in range(20):
  temp = product14.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 3
  product14_total = product14_total.append(temp)
  product14_total = product14_total.reset_index(drop=True)

# product15
for i in range(20):
  temp = product15.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 3
  product15_total = product15_total.append(temp)
  product15_total = product15_total.reset_index(drop=True)

# product16
for i in range(20):
  temp = product16.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+6)%24, (i+6)%24, (i+6)%24, (i+6)%24, (i+7)%24, (i+7)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 9
  product16_total = product16_total.append(temp)
  product16_total = product16_total.reset_index(drop=True)

# product17
for i in range(20):
  temp = product17.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+6)%24, (i+6)%24, (i+6)%24, (i+6)%24, (i+7)%24, (i+7)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 9
  product17_total = product17_total.append(temp)
  product17_total = product17_total.reset_index(drop=True)

# product18
for i in range(20):
  temp = product18.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+6)%24, (i+6)%24, (i+7)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 6
  product18_total = product18_total.append(temp)
  product18_total = product18_total.reset_index(drop=True)

# product19
for i in range(20):
  temp = product19.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 3
  product19_total = product19_total.append(temp)
  product19_total = product19_total.reset_index(drop=True)

# product20
for i in range(20):
  temp = product20.copy()
  temp['시'] = [(i+6)%24, (i+6)%24, (i+7)%24]
  temp['시간대'] = [str((i + 6)%24) + '시'] * 3
  product20_total = product20_total.append(temp)
  product20_total = product20_total.reset_index(drop=True)

augmented_test = pd.concat([product1_total, product2_total, product3_total, product4_total, product5_total,
                            product6_total, product7_total, product8_total, product9_total, product10_total,
                            product11_total, product12_total, product13_total, product14_total, product15_total,
                            product16_total, product17_total, product18_total, product19_total, product20_total], axis=0)

# 사용할 변수 리스트
col = ['emb_label_주문량', 'emb_label2_주문량', 'emb_label3_주문량', '월_주문량', '요일_주문량', '시등급_주문량', '분등급_주문량','노출횟수_주문량', '판매단가',
       'emb_label','emb_label2','emb_label3', '시', '분' ,'월', '요일','노출횟수']

# 사용할 변수 리스트에 임베딩 컬럼 추가
col.extend([str(x) for x in range(159)])
len(col)

augmented_test_fit = augmented_test[col]

scaler = RobustScaler()
scaler.fit(augmented_test_fit)
augmented_test_final = pd.DataFrame(scaler.transform(augmented_test_fit), columns=augmented_test_fit.columns)

2. 모델에 train으로 학습된 weight를 넣고 합쳐진 test 데이터 prediction

def build_model(input_layer, start_neurons):
    
    conv1 = Conv1D(start_neurons * 1, 3, activation="elu", padding="same", kernel_initializer='he_normal', input_shape=(augmented_test_final.shape[1], 1))(input_layer)
    conv1 = Conv1D(start_neurons * 1, 3, activation="elu", padding="same", kernel_initializer='he_normal')(conv1)
    pool1 = BatchNormalization()(conv1)
    pool1 = MaxPooling1D(2)(pool1)
    pool1 = Dropout(0.1)(pool1)

    conv2 = Conv1D(start_neurons * 2, 3, activation="elu", padding="same", kernel_initializer='he_normal')(pool1)
    conv2 = Conv1D(start_neurons * 2, 3, activation="elu", padding="same", kernel_initializer='he_normal')(conv2)
    pool2 = BatchNormalization()(conv2)
    pool2 = MaxPooling1D(2)(pool2)
    pool2 = Dropout(0.1)(pool2)

    conv3 = Conv1D(start_neurons * 2, 3, activation="elu", padding="same", kernel_initializer='he_normal')(pool2)
    conv3 = Conv1D(start_neurons * 2, 3, activation="elu", padding="same", kernel_initializer='he_normal')(conv3)
    pool3 = BatchNormalization()(conv3)
    pool3 = AveragePooling1D(2)(pool3)
    pool3 = Dropout(0.1)(pool3)


    convm = Conv1D(start_neurons * 4, 3, activation="elu", padding="same",kernel_initializer='he_normal')(pool3)

    deconv3 = Conv1DTranspose(start_neurons * 1, 3, strides=2, padding="same",kernel_initializer='he_normal')(convm)
    uconv3 = concatenate([deconv3, conv3])
    uconv3 = Dropout(0.1)(uconv3)
    uconv3 = Conv1D(start_neurons * 1, 3, activation="elu", padding="same",kernel_initializer='he_normal')(uconv3)
    uconv3 = Conv1D(start_neurons * 1, 3, activation="elu", padding="same",kernel_initializer='he_normal')(uconv3)
    uconv3 = BatchNormalization()(uconv3)
    uconv3 = Dropout(0.1)(uconv3)

    deconv2 = Conv1DTranspose(start_neurons * 1, 3, strides=2, padding="same",kernel_initializer='he_normal')(uconv3)
    uconv2 = concatenate([deconv2, conv2])
    uconv2 = Dropout(0.1)(uconv2)
    uconv2 = Conv1D(start_neurons * 1, 3, activation="elu", padding="same",kernel_initializer='he_normal')(uconv2)
    uconv2 = Conv1D(start_neurons * 1, 3, activation="elu", padding="same",kernel_initializer='he_normal')(uconv2)
    uconv2 = BatchNormalization()(uconv2)
    uconv2 = Dropout(0.1)(uconv2)

    deconv1 = Conv1DTranspose(start_neurons * 1, 3, strides=2, padding="same",kernel_initializer='he_normal')(uconv2)
    uconv1 = concatenate([deconv1, conv1])
    uconv1 = Dropout(0.1)(uconv1)
    uconv1 = Conv1D(start_neurons * 1, 3, activation="elu", padding="same",kernel_initializer='he_normal')(uconv1)
    uconv1 = Conv1D(start_neurons * 1, 3, activation="elu", padding="same",kernel_initializer='he_normal')(uconv1)
    uconv1 = BatchNormalization()(uconv1)
    uconv1 = Dropout(0.1)(uconv1)

    flat = Flatten()(uconv1)
    dense1 = Dense(256, activation='elu')(flat)
    dense2 = Dense(128, activation='elu')(dense1)
    output_layer = Dense(1)(dense2)
    
    return output_layer

# model 생성
input_layer = Input((augmented_test_final.shape[1], 1))
output_layer = build_model(input_layer, 128)
model = Model(input_layer, output_layer)

# model 가중치 불러오기
model.load_weights('/content/drive/Shared drives/빅콘테스트2020/weights/ds/functional_3.44-35.0913.hdf5')

# 수량 예측
y_pred = model.predict(augmented_test_final.values)

y_pred = y_pred.reshape(-1)

3. 테스트 데이터의 판매단가와 예측된 수량을 곱하여 시간대별 취급액 계산

augmented_test['취급액'] = y_pred * augmented_test['판매단가']

# 2020년 6월 1일 각 상품의 시간대별 예상 취급액
mat = pd.pivot_table(augmented_test, index='마더코드', values='취급액', aggfunc='sum', columns='시간대')

sorted_cols = ['6시', '7시', '8시', '9시', '10시', '11시', '12시', '13시', '14시', '15시',
               '16시', '17시', '18시', '19시', '20시', '21시', '22시', '23시', '0시', '1시']

mat = mat[sorted_cols]

mat

시간대	6시	7시	8시	9시	10시	11시	12시	13시	14시	15시	16시	17시	18시	19시	20시	21시	22시	23시	0시	1시
마더코드
100012	58916498.68	60287677.17	61641217.79	62674660.48	63148839.87	63193057.50	62861731.05	62229824.30	61438704.87	60873860.99	60526567.97	60120257.77	59603198.79	59109913.69	58808172.59	58549890.24	58443557.48	52142905.49	53739720.47	54105199.32
100080	42652047.08	42970121.63	43666148.84	44976049.03	48548156.18	61126510.67	73266923.10	79114600.46	81624116.93	82458607.23	82118211.58	80976277.57	78914798.93	77018322.37	76040012.41	75678364.23	76032187.91	70523072.41	49785379.02	48175683.56
100148	185145973.22	180336217.41	174860482.96	167778606.00	158035734.75	145553533.81	131902052.96	120102252.09	111777401.54	103849914.15	96138615.72	88409898.77	81165384.75	77443930.87	77423945.02	80572644.01	86237346.59	122677394.84	228891067.62	220155730.78
100150	173788767.40	172738844.67	171254144.88	170248768.90	169294587.37	167296978.25	163249081.93	157810567.96	153697031.93	151154194.19	149049005.42	147403128.76	145845407.37	144940826.06	144642736.58	144657162.79	145173836.47	150836665.09	176814826.80	176304118.11
100205	28226509.34	28238176.09	28201691.42	28259724.45	28596455.75	29337910.06	30476169.31	32054912.64	33166910.40	33751216.56	34020670.60	34010987.85	33924703.66	33878473.95	33834178.50	33851773.20	34039954.67	28336878.91	27485001.80	27730118.86
100320	7938956.02	8117945.50	9059131.20	11625453.95	18760765.06	27099477.40	29725222.52	30840627.94	31704253.14	31994486.31	32081974.21	32486049.27	32742214.94	32531301.36	32246511.56	32009179.59	31876124.74	28074611.63	9180502.25	8931297.62
100348	36763874.52	36804761.46	37363498.79	40119459.07	49780673.91	59612440.52	60546210.00	58877353.01	57423587.98	55904948.04	54617023.09	53434339.93	52222687.49	50999411.69	49859578.74	48985449.68	48263319.06	48675752.17	41825978.41	40884313.77
100362	26368636.70	27728388.59	30624623.51	33663631.70	35949713.70	37114529.35	35365865.70	32464243.28	30342646.50	29245008.68	28710407.98	28285291.80	27913946.48	27670257.31	27716175.55	27885393.78	28072919.39	26859061.01	25490376.43	25236320.78
100381	34546547.21	33980521.36	34216914.78	36550405.47	43166371.95	50853467.15	52540481.29	51217258.47	50582446.70	51149911.18	51478257.13	51414595.54	51445932.98	51902137.77	52812497.44	53995163.13	55236354.39	50114881.42	40770344.10	39835769.91
100383	25795988.67	26308476.19	26836003.31	27334293.11	27774157.15	28006225.70	27873563.21	27587825.08	27239688.78	26959204.11	26615222.65	26251439.22	25925597.16	25644643.99	25377833.96	25131644.51	24954059.76	22525384.12	22741750.90	23179304.79
100416	14327550.22	14867988.34	15525866.19	16212338.76	16768610.61	17210767.53	17384077.67	17382776.61	17351670.19	17399072.14	17569929.93	17662096.40	17607717.00	17504066.26	17526958.10	17614586.69	17746300.34	14070753.33	12377328.13	12486886.61
100445	77066930.11	78113096.41	79281101.35	80374387.44	80414045.36	79388920.73	77965382.32	76888314.79	76193033.99	75997634.82	76073238.94	76199455.92	76540871.17	76762194.86	77066121.63	77503805.11	78022018.21	73733366.94	74841546.88	74679109.59
100501	63069155.23	67056032.59	72073387.64	77652225.77	82527294.41	85507336.40	86637820.91	86496734.21	86553635.22	85724530.95	84753991.04	84046419.11	83563565.87	83172484.45	83124369.28	83421136.71	83905492.47	72376527.08	54077488.01	54548861.85
100526	22544505.15	23836771.12	26709743.81	30292506.59	35255691.88	41581845.46	44553146.23	45773559.00	47273175.80	48664078.64	49266007.94	49423545.03	49745600.04	50150705.70	50419568.10	50437103.01	50349844.49	31991635.90	23335178.27	22642427.85
100537	46702416.06	47820613.32	49341207.21	50917615.20	53001099.49	54431675.46	53108927.39	50694715.59	49047923.70	48525274.86	48751755.58	48911946.14	48797552.66	48810764.18	48923133.92	48920405.16	48990596.68	49282686.89	45926297.86	45815307.72
100554	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00
100555	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00
100570	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00
100638	61044321.01	61661966.61	62384578.93	63938272.18	66774916.68	70261121.25	71205205.51	72195166.05	73025397.90	74372216.35	75306994.13	75944766.21	76619282.71	77876457.57	79480489.47	81270224.37	82978546.89	67504282.46	62492017.18	61009966.62
100650	11000853.57	11253162.19	11700692.92	12590580.85	15015186.12	18397997.96	19520179.60	19656245.67	19542662.08	19167662.90	18825558.01	18366565.68	17836036.87	17157308.10	16643809.72	16234409.53	16004050.28	15791447.80	11563562.66	11412800.48

4. MSPMA(Maximum Sales Probability Matching Algorithm) 적용하여 각 상품별 최적 시간대 찾기

# maxtrix 내 선택을 위한 빈 slot 데이터프레임 만들기
slot = mat.copy()
slot.iloc[:, :] = 0

# 두 개의 취급액의 한 시간대를 놓고 경합할 경우의 확률적 선택 계산
def probability_matching(column, prod1, prod2):
    sale1 = mat.iloc[prod1, column] + 1e-15
    sale2 = mat.iloc[prod2, column] + 1e-15
    prob_sale1 = round(sale1 / (sale1 + sale2), 2)
    prob_sale2 = round(sale2 / (sale1 + sale2), 2)
    package = [prod1, prod2]
    return random.choices(package, weights=[prob_sale1, prob_sale2])[0]

# MSPMA 알고리즘
def mspma(now_row, max_col):
    # 현재 상태의 값을 slot에 표시
    slot.iloc[now_row, max_col] = 1
    
    # slot 테이블에서 max_col에 로우 두 개가 1로 된 경우
    temp = np.array(slot.iloc[:, max_col])
    if len(temp[temp == 1]) == 2:
        
        # now_row외에 다른 1의 row위치를 찾기
        temp_col = list(slot.iloc[:, max_col])
        temp_col[now_row] = -1
        compare_row = np.where(np.array(temp_col) == 1)[0][0]
        
        # 확률 매칭 함수 적용
        result = probability_matching(max_col, now_row, compare_row)
        
        # 비교 대상(compare_row)이 선택되는 경우 (now_row의 max_col 위치가 바뀌어야 하는 상황)
        if result == compare_row:
            # now_row기존의 값을 -1로 대체
            slot.iloc[now_row, max_col] = -1
            mat.iloc[now_row, max_col] = -1
            
            # now_row에 0만 있는 것들 중 가장 큰 것의 인덱스 찾기
            max_col = np.argmax(mat.iloc[now_row, :])

            # slot에 현재 now_row에서 가장 큰 값이라고 표시
            slot.iloc[now_row, max_col] = 1
            
            # 재귀 함수
            mspma(now_row, max_col)

        # 현재 상품(now_row)이 선택되는 경우 (compare_row의 max_col 위치가 바뀌어야 하는 상황)
        else:
            # compare_row기존의 값을 -1로 대체
            slot.iloc[compare_row, max_col] = -1
            mat.iloc[compare_row, max_col] = -1
            
            # compare_row에 0만 있는 것들 중 가장 큰 것의 인덱스 찾기
            max_col = np.argmax(mat.iloc[now_row, :])
            
            # slot에 현재 compare_row에서 가장 큰 값이라고 표시
            slot.iloc[compare_row, max_col] = 1
            
            # 재귀함수
            mspma(compare_row, max_col)

# 최종 구현
start_row = 0

for i in range(20):
    new_col = np.argmax(mat.iloc[start_row, :])   
    mspma(start_row, new_col)
    start_row += 1

def isHangul(text):
    pyVer3 =  sys.version_info >= (3, 0)

    if pyVer3 : # for Ver 3 or later
        encText = text
    else: # for Ver 2.x
        if type(text) is not unicode:
            encText = text.decode('utf-8')
        else:
            encText = text

    hanCount = len(re.findall(u'[\u3130-\u318F\uAC00-\uD7A3]+', encText))
    return hanCount > 0

for i in slot.columns:
    mother_code = slot[i][slot[i] == 1].index[0]
    name = june_1st[june_1st['마더코드'] == mother_code]['상품명']
    
    cnt = 0
    temp = set(name.values)
    for j in temp:
      for k in range(len(j)):
            temp2 = j[k]
            if isHangul(temp2):
                cnt += 2
            else:
                cnt += 1
            
    print('*')
    if len(i) > 2:
        print('--------  ', '------------------', ' '+'-'*cnt+'--'+'-'*(4*len(temp)-1)+'-------')
        print('|', i+' |','-> 마더코드:', mother_code,'| 상품:', set(name.values),'|')
        print('--------  ', '------------------', ' '+'-'*cnt+'--'+'-'*(4*len(temp)-1)+'-------')
    else:
        print('-------  ', '------------------', ' '+'-'*cnt+'--'+'-'*(4*len(temp)-1)+'------')
        print('|', i+' |','-> 마더코드:', mother_code,'| 상품:', str(temp)+'|')
        print('-------  ', '------------------', ' '+'-'*cnt+'--'+'-'*(4*len(temp)-1)+'------')

*
-------   ------------------  --------------------------------------------
| 6시 | -> 마더코드: 100554 | 상품: {'DB손해보험 참좋은운전자보험(1912)'}|
-------   ------------------  --------------------------------------------
*
-------   ------------------  ------------------------------------------------
| 7시 | -> 마더코드: 100555 | 상품: {'디비손보 아임오케이 암보험(20.05런칭)'}|
-------   ------------------  ------------------------------------------------
*
-------   ------------------  -----------------------------------------------
| 8시 | -> 마더코드: 100362 | 상품: {'에이유플러스 슈퍼선스틱 1004(최저가)'}|
-------   ------------------  -----------------------------------------------
*
-------   ------------------  -----------------------------------------------
| 9시 | -> 마더코드: 100445 | 상품: {'쿠미투니카 쿨 레이시 란쥬쉐이퍼&팬티'}|
-------   ------------------  -----------------------------------------------
*
--------   ------------------  ---------------------------------
| 10시 | -> 마더코드: 100012 | 상품: {'AAC 삼채포기김치 10kg'} |
--------   ------------------  ---------------------------------
*
--------   ------------------  ----------------------------------------
| 11시 | -> 마더코드: 100537 | 상품: {'[기간]제주바다자연산돔39마리'} |
--------   ------------------  ----------------------------------------
*
--------   ------------------  -------------------------------------
| 12시 | -> 마더코드: 100650 | 상품: {'잭필드 남성  반팔셔츠 4종'} |
--------   ------------------  -------------------------------------
*
--------   ------------------  -------------------------------------------------------------------------------------------------------------------------
| 13시 | -> 마더코드: 100383 | 상품: {'한라궁 황칠 제주오메기떡 2종 40개 (호박오메기20봉+팥오메기20봉)', '한라궁 황칠 제주오메기떡 40개 (60g * 40봉)'} |
--------   ------------------  -------------------------------------------------------------------------------------------------------------------------
*
--------   ------------------  ----------------------------------------------------------
| 14시 | -> 마더코드: 100501 | 상품: {'파격가 노비타 스마트 비데 무료설치(변기세정제)'} |
--------   ------------------  ----------------------------------------------------------
*
--------   ------------------  ---------------------------------------------
| 15시 | -> 마더코드: 100348 | 상품: {'벨레즈온 심리스 원피스 4종 패키지'} |
--------   ------------------  ---------------------------------------------
*
--------   ------------------  --------------------------------------------------------------------------------------------------------------------------------------------------------
| 16시 | -> 마더코드: 100205 | 상품: {'보루네오 델루나 유로탑 슬라이딩 LED침대 슈퍼싱글', '보루네오 델루나 유로탑 슬라이딩 LED침대 퀸', '보루네오 델루나 유로탑 슬라이딩 LED침대 킹'} |
--------   ------------------  --------------------------------------------------------------------------------------------------------------------------------------------------------
*
--------   ------------------  -----------------------------------------
| 17시 | -> 마더코드: 100416 | 상품: {'코몽트 남성 티셔츠 8종(시즌1)'} |
--------   ------------------  -----------------------------------------
*
--------   ------------------  ------------------------------------------------------------
| 18시 | -> 마더코드: 100080 | 상품: {'아키 라이크라 릴렉스 보정브라 패키지(뉴아키28차)'} |
--------   ------------------  ------------------------------------------------------------
*
--------   ------------------  ------------------------------------------------------------------
| 19시 | -> 마더코드: 100320 | 상품: {'올바로 문어발 샤워기 1세트', '올바로 문어발 샤워기 1+1'} |
--------   ------------------  ------------------------------------------------------------------
*
--------   ------------------  ----------------------------------------
| 20시 | -> 마더코드: 100381 | 상품: {'바비리스 퍼펙트 볼륨스타일러'} |
--------   ------------------  ----------------------------------------
*
--------   ------------------  -------------------------------------------------------------
| 21시 | -> 마더코드: 100526 | 상품: {'소노비 줄리엣 소가죽 토트백 1종+투숄더 버킷백 1종'} |
--------   ------------------  -------------------------------------------------------------
*
--------   ------------------  -------------------------------
| 22시 | -> 마더코드: 100638 | 상품: {'램프쿡 자동회전냄비'} |
--------   ------------------  -------------------------------
*
--------   ------------------  ---------------------------------------------------------------------
| 23시 | -> 마더코드: 100570 | 상품: {'(특)KT휴대폰_삼성갤럭시 A31', 'KT휴대폰_삼성갤럭시 노트10'} |
--------   ------------------  ---------------------------------------------------------------------
*
-------   ------------------  ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| 0시 | -> 마더코드: 100148 | 상품: {'LG 울트라HD TV AI ThinQ(인공지능 씽큐) 65형 65UN7850KNA', 'LG 울트라HD TV AI ThinQ(인공지능 씽큐) 55형 55UN7850KNA', 'LG 울트라HD TV AI ThinQ(인공지능 씽큐) 75형 75UN7850KNA'}|
-------   ------------------  ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
*
-------   ------------------  -----------------------------------------------
| 1시 | -> 마더코드: 100150 | 상품: {'LG전자 통돌이 세탁기 TR14WK1(화이트)'}|
-------   ------------------  -----------------------------------------------

* 참고: 기존의 편성표 취급액 합과 MSPMA 적용 후 편성표 취급액 합 비교

기존의 취급액 합

sclr = RobustScaler()
sclr.fit(june_1st[col])
test_scaled = pd.DataFrame(sclr.transform(june_1st[col]), columns=june_1st[col].columns)

# model 생성
input_layer = Input((test_scaled.shape[1], 1))
output_layer = build_model(input_layer, 128)
model = Model(input_layer, output_layer)

# model 가중치 불러오기
model.load_weights('/content/drive/Shared drives/빅콘테스트2020/weights/ds/functional_3.44-35.0913.hdf5')

# 수량 예측
y_pred = model.predict(test_scaled.values)

y_pred = y_pred.reshape(-1)

sum(june_1st['판매단가'] * y_pred)

862979925.8163452

MSPMA 이후 취급액 합

new_sum = 0

for i in slot.columns:
    mother_code = slot[i][slot[i] == 1].index[0]
    new_sum += mat[i][mother_code]

new_sum

1172698313.564705

기존과 MSPMA 적용 후의 취급액 합 차이 비교

df = pd.DataFrame({'before': [sum(june_1st['판매단가'] * y_pred)], 'after':[new_sum]}).T
df.columns= ['취급액']
df.reset_index(inplace=True)

sns.barplot(x="index", y="취급액", data=df)

<matplotlib.axes._subplots.AxesSubplot at 0x7f9004dae828>



findfont: Font family ['Malgun Gothic'] not found. Falling back to DejaVu Sans.
/usr/local/lib/python3.6/dist-packages/matplotlib/backends/backend_agg.py:214: RuntimeWarning: Glyph 52712 missing from current font.
  font.set_text(s, 0.0, flags=flags)
/usr/local/lib/python3.6/dist-packages/matplotlib/backends/backend_agg.py:214: RuntimeWarning: Glyph 44553 missing from current font.
  font.set_text(s, 0.0, flags=flags)
/usr/local/lib/python3.6/dist-packages/matplotlib/backends/backend_agg.py:214: RuntimeWarning: Glyph 50529 missing from current font.
  font.set_text(s, 0.0, flags=flags)
/usr/local/lib/python3.6/dist-packages/matplotlib/backends/backend_agg.py:183: RuntimeWarning: Glyph 52712 missing from current font.
  font.set_text(s, 0, flags=flags)
/usr/local/lib/python3.6/dist-packages/matplotlib/backends/backend_agg.py:183: RuntimeWarning: Glyph 44553 missing from current font.
  font.set_text(s, 0, flags=flags)
/usr/local/lib/python3.6/dist-packages/matplotlib/backends/backend_agg.py:183: RuntimeWarning: Glyph 50529 missing from current font.
  font.set_text(s, 0, flags=flags)

png

2. MSPMA(Maximum Sales Probability Matching Algorithm)

2020년 6월 1일의 시간대별 상품 취급액 예측 후 MSPMA로 새로운 편성 확인

1. 테스트 데이터의 6월 1일만 추출하여 데이터 셋 만들기

2. 모델에 train으로 학습된 weight를 넣고 합쳐진 test 데이터 prediction

3. 테스트 데이터의 판매단가와 예측된 수량을 곱하여 시간대별 취급액 계산

4. MSPMA(Maximum Sales Probability Matching Algorithm) 적용하여 각 상품별 최적 시간대 찾기

* 참고: 기존의 편성표 취급액 합과 MSPMA 적용 후 편성표 취급액 합 비교

Related Posts

Codibook Recommender System Project - 2. Image Processing 28 Nov 2021

Codibook Recommender System Project - 1. Web Scraping 28 Nov 2021

Pytorch Computer Vision Ch02 06 Sep 2021