#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# seniority_list is an analytical tool used when seniority-based work
# groups merge. It brings modern data science to the area of labor
# integration, utilizing the powerful data analysis capabilities of Python
# scientific computing.
# Copyright (C) 2016-2017 Robert E. Davison, Ruby Data Systems Inc.
# Please direct inquires to: rubydatasystems@fastmail.net
# This program is free software: you can redistribute it and/or modiffy
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
'''bokeh_editor.py
EDITOR TOOL
requires bokeh 0.12.13+ - uses bokeh server
'''
import numpy as np
import pandas as pd
import os
import sys
import pickle
from functools import partial
from collections import OrderedDict as od
from types import SimpleNamespace as sn
import scipy.stats as st
from scipy.signal import savgol_filter as sf
from numpy.polynomial import Polynomial as poly
from bokeh.plotting import figure
from bokeh.layouts import column, row, layout
from bokeh.models import ColumnDataSource, DataRange1d, \
Span, Panel, Tabs, Label, NumeralTickFormatter, \
DatetimeTickFormatter, HoverTool, CrosshairTool
from bokeh.models.layouts import Spacer
from bokeh.models.widgets import Slider, Button, Select, \
RangeSlider, TextInput, CheckboxGroup
from bokeh.models.glyphs import Line
from bokeh.models.annotations import BoxAnnotation
from bokeh.transform import jitter
import functions as f
from matplotlib_charting import filter_ds
[docs]class Data():
def __init__(self, data=None):
self.data = data
[docs] def update_data(self, d):
self.data = d
[docs]class PropOrder():
def __init__(self, list_order=None, name=None):
self.list_order = list_order
self.name = name
[docs] def update_order(self, new_order):
self.list_order = new_order
[docs] def update_name(self, new_name):
self.name = new_name
[docs]class Kwargs():
def __init__(self, kdict=None):
self.kdict = kdict
if self.kdict is None:
self.kdict = {}
[docs] def update(self, other_dict):
self.kdict.update(other_dict)
[docs] def add(self, key, value):
self.kdict[key] = value
[docs] def remove(self, key):
self.kdict.pop(key)
[docs] def clear(self):
self.kdict.clear()
[docs]def editor(doc,
poly_dim=15,
ema_len=25,
savgol_window=35,
savgol_fit=1,
animate_speed=350,
plot_width=1100,
plot_height=500,
strip_eg_height=50,
start_dot_size=4.75,
max_dot_size=25,
start_marker_alpha=.65,
marker_edge_color=None,
marker_edge_width=0.0):
'''create the editor tool
use the following code to run within the notebook:
.. code:: python
import editor_function as ef
from functools import partial
from bokeh.io import show, output_notebook
from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application
output_notebook()
handler = FunctionHandler(partial(ef.editor,
# optional kwargs,
))
app = Application(handler)
show(app)
inputs
doc (variable)
a variable representing the bokeh document, do not modify
poly_dim (integer)
the order of the polynomial fit line
ema_len (integer)
the smoothing length to use when constructing the exponential
moving average line
savgol_window (positive odd integer)
Savitzky-Golay filter window length
savgol_fit (integer)
The order of the polynomial used to fit the samples.
This value must be less than the savgol_window value.
animate_speed (integer)
Number of milliseconds between each animated month display
plot_width (integer)
width of main and density charts in pixels
plot_height (integer)
height of main chart in pixels
strip_eg_height (integer)
height alloted for each employee group when constructing
the density chart
start_dot_size (float)
initial scatter marker size for main chart
max_dot_size (integer)
maximum scatter marker size for the main chart display, set
to size sliders
start_marker_alpha (float)
initial scatter marker alpha (transparency) for main chart
display
marker_edge_color (color value string or None)
color of scatter marker edge color for main chart when
marker edge width value is greater than zero
marker_edge_width (float)
width of scatter marker edge width when marker_edge_color is
not None
'''
# ------START variable assignment------------------------------
try:
settings_dict = pd.read_pickle('dill/dict_settings.pkl')
color_dict = pd.read_pickle('dill/dict_color.pkl')
except OSError:
print('dict_settings.pkl and/or dict_color.pkl not found ' +
'\nperhaps run build_program_files.py?')
# the editor dictionary contains values representing the current state
# of widget values and other variables.
# The values are stored as a pickled dictionary file between sessions.
# The editor dictionary is converted to a SimpleNamespace object
# for use within the routine.
# This way dot notation and global access is provided.
# sn is the alias for SimpleNamespace
ed = sn(**pd.read_pickle('dill/editor_dict.pkl'))
# grab proposal names for sel_base and sel_proposal dropdowns
p_list = list(pd.read_pickle('dill/proposal_names.pkl').proposals.values)
# limit proposal names to 10 characters to maintain layout integrity
p_list = [x[:10] for x in p_list]
# add hybrid if a hybrid dataset exists
if os.path.exists('dill/ds_hybrid.pkl'):
p_list.append('hybrid')
# make a list for baseline selection (add standalone)
base_p_list = [p for p in p_list if p != 'edit']
base_p_list.append('standalone')
# add edit to p_list
if 'edit' not in p_list:
p_list.append('edit')
max_month = ed.num_of_months
mth_str_list = list(np.arange(0, max_month).astype(str))
# date list for animation label background
date_list = list(pd.date_range(start=settings_dict['starting_date'],
periods=max_month, freq='M'))
date_list = [x.strftime('%Y %b') for x in date_list]
# cover the possibility of rgba values in eg_color_dict values:
eg_cdict = f.convert_to_hex(color_dict['eg_color_dict'])
eg_list = list(eg_cdict.keys())
# used for stripplot source (see callbacks update_scat_size_p2/alpha2)
num_dots = ed.total_count
str_eg_list = [str(eg) for eg in eg_list]
# desc = Div(text=open(os.path.join(os.path.dirname(__file__),
# 'description.html')).read(),
# width=800)
# slider steps for marker size and alpha
size_step = .25
alpha_step = .025
strip_height = len(eg_list) * strip_eg_height
aux_slider_height = 160
aux_slider_width = 22
panel1_width = 460
panel2_width = max(450, 210 + (2 * aux_slider_width * len(eg_list)))
slider_edit_width = plot_width - 80
all_colors = color_list()
alphas = alpha_list()
widths = line_widths()
# layout variables
controls_height = 220
chart_sel_height = 140
but_space_width = 50
but_save_width = 260
sel_height = 40
sel_width = 95
main_but_width = 120
toggle_but_width = 25
toggle_space_width = 20
toggle_center_width = 65
but_height = 35
# squeeze tab
drop_dir_dict = {'u >>': 'u', '<< d': 'd'}
incr_dir_dict = {'u >>': -1, '<< d': 1}
# these items are referenced when datasets are created
# baseline datasets are created and stored with the RUN_SCRIPTS notebook
# edited datasets are created with the editor tool for analysis
cond_dict = {'none': [],
'prex': ['prex'],
'count': ['count'],
'ratio': ['ratio'],
'pc': ['prex', 'count'],
'pr': ['prex', 'ratio'],
'cr': ['count', 'ratio'],
'pcr': ['prex', 'count', 'ratio']}
pcnt_cols = ['spcnt', 'lspcnt']
float_cols = ['jobp', 'mpay', 'cpay', 'ylong', 'mlong', 'age']
date_cols = ['date', 'doh', 'ldate', 'retdate']
no_invert = ['mnum', 'date', 'year', 'retdate', 'doh', 'ldate',
'scale', 's_lmonths', 'age', 'job_count', 'mlong',
'ylong', 'mpay', 'cpay']
p1_tools = 'pan, box_zoom, wheel_zoom, reset, undo, redo, save'
p2_tools = 'wheel_zoom, box_zoom, reset, save'
# Select widget arguments
sel_size_kwargs = {'width': sel_width, 'height': sel_height}
# density tab
aux_slider_kwargs = {'height': aux_slider_height,
'width': aux_slider_width,
'direction': 'rtl',
'orientation': 'vertical',
'tooltips': False,
'show_value': False}
size_alpha_kwargs = {'width': 30,
'height': 30}
# extra filters and display tabs
opers = ['<', '<=', '==', '!=', '>=', '>']
opers2 = opers + ['']
# extra filters options
attr_list = ['', 'cat_order', 'jobp', 'jnum', 'mnum', 'eg',
'date', 'ldate', 'doh', 'retdate', 'ylong', 'mlong',
'sg', 'age', 'scale', 's_lmonths',
'lnum', 'snum', 'mnum', 'rank_in_job',
'mpay', 'cpay']
# add or remove keys and values here for hover selection generation
hdict = {0: ('lname', '@lname'),
1: ('empkey', '@empkey'),
2: ('ldate', '@ldate{%F}'),
3: ('retdate', '@retdate{%F}'),
4: ('spcnt', '@spcnt{.000}'),
5: ('ylong', '@ylong{0.00}'),
6: ('age', '@age{0.0}')}
# default string for tooltip formatting
# the tuples from the dictionary above are added as appropriate for
# proper hover names and value formatting further in the routine
html_str = ('<div>' +
'<span style=' +
'"font-size: 13px; font-weight: bold; ' +
'color: @c;">%s:</span>' +
'<span style="font-size: 13px;">%s</span>' +
'</div>')
# display attribute options
display_attrs = ['jobp', 'cat_order', 'spcnt', 'lspcnt',
'jnum', 'mpay', 'cpay', 'snum', 'lnum',
'ylong', 'mlong', 'age', 's_lmonths',
'ldate', 'doh']
# size_alpha tab vars
sl_size_dict = {}
sl_alpha_dict = {}
slider_list = []
# plot_note label and calc_note label arguments
note_kwargs = dict(x=40, y=40, x_units='screen',
y_units='screen',
border_line_color='black',
border_line_alpha=.5,
background_fill_alpha=1.0,
text_font_size='15pt',
visible=False)
plot_kwargs = dict(text='..filtering data... ',
background_fill_color='#ffcc80',
**note_kwargs)
calc_kwargs = dict(text='..calculating new dataset... ',
background_fill_color='#99ddff',
**note_kwargs)
# ------END variable assignment---------------------------------
# ------START widget declarations-------------------------------
# squeeze tab
sel_sqz_type = Select(options=['log', 'slide'],
value=ed.sel_sqz_type,
title='sqz type',
**sel_size_kwargs)
sel_emp_grp = Select(options=str_eg_list,
value=ed.sel_emp_grp,
title='emp group',
**sel_size_kwargs)
sel_sqz_dir = Select(options=['u >>', '<< d'],
value=ed.sel_sqz_dir,
title='sqz dir',
**sel_size_kwargs)
slider_squeeze = Slider(start=1, end=400,
value=ed.slider_squeeze,
step=1,
title='squeeze',
width=450, height=40,
bar_color='#ffe6cc')
but_0add = Button(label='<', width=toggle_but_width)
but_0sub = Button(label='>', width=toggle_but_width)
but_squeeze = Button(label='SQUEEZE', width=main_but_width,
height=but_height, button_type='success')
but_1add = Button(label='<', width=toggle_but_width)
but_1sub = Button(label='>', width=toggle_but_width)
# extra filters tab
sel_filt1 = Select(options=attr_list,
value=ed.sel_filt1,
title='Filter 1', width=115, height=sel_height)
sel_filt2 = Select(options=attr_list,
value=ed.sel_filt2,
title='Filter 2', width=115, height=sel_height)
sel_filt3 = Select(options=attr_list,
value=ed.sel_filt3,
title='Filter 3', width=115, height=sel_height)
sel_oper1 = Select(options=opers2,
value=ed.sel_oper1,
title='Oper 1', **sel_size_kwargs)
sel_oper2 = Select(options=opers2,
value=ed.sel_oper2,
title='Oper 2', **sel_size_kwargs)
sel_oper3 = Select(options=opers2,
value=ed.sel_oper3,
title='Oper 3', **sel_size_kwargs)
txt_input1 = TextInput(value=ed.txt_input1,
title='Val 1', height=sel_height, width=145)
txt_input2 = TextInput(value=ed.txt_input2,
title='Val 2', height=sel_height, width=145)
txt_input3 = TextInput(value=ed.txt_input3,
title='Val 3', height=sel_height, width=145)
# animate tab
slider_animate = Slider(start=0, end=max_month - 1,
value=int(ed.sel_mth_num),
step=1, title='Month',
width=350,
orientation='horizontal',
tooltips=False,
show_value=True,
bar_color='#a6a6a6')
but_play = Button(label='► Play', width=90)
but_reset = Button(label='Reset', width=90)
# This commented section is on hold for future development...
# chk_trails = CheckboxGroup(labels=['show_trails'],
# active=ed.chk_trails,
# height=35, width=130, inline=False)
# trails_list = ['all']
# trails_list.extend(mth_str_list)
# sel_trails = Select(options=trails_list,
# value=ed.sel_trails, title='trail_mths',
# width=sel_width, height=sel_height)
but_fwd = Button(label='FWD', width=90)
but_back = Button(label='BACK', width=90)
but_refresh = Button(label='refresh size_alpha',
width=120)
label = Label(x=20, y=plot_height - 150,
x_units='screen', y_units='screen',
text='', text_alpha=.25,
text_color='#b3b3b3',
text_font_size='70pt')
# proposal_save tab
but_save_edit = Button(label='SAVE EDITED DATASET',
button_type='warning',
width=but_save_width)
but_save_order = Button(label='SAVE EDITED ORDER to proposals.xlsx',
button_type='danger',
width=but_save_width)
sel_base = Select(options=base_p_list,
value=ed.sel_base,
title='baseline:',
width=sel_width, height=sel_height + 5)
condition_options = list(cond_dict.keys())
sel_cond = Select(options=condition_options,
value=ed.sel_cond,
title='conditions:',
width=sel_width, height=sel_height + 5)
sel_proposal = Select(options=p_list,
value=ed.sel_proposal,
title='proposal:',
**sel_size_kwargs)
# center column
sel_measure = Select(options=display_attrs,
value=ed.sel_measure,
title='display attr:',
width=sel_width, height=sel_height + 15)
but_calc = Button(label='CALC', width=sel_width + 12,
height=but_height, button_type='primary')
but_plot = Button(label='PLOT', width=sel_width + 12,
height=but_height, button_type='warning')
# display tab
chk_filter = CheckboxGroup(labels=['use extra filters', 'at_retire_only'],
active=ed.chk_filter,
height=35, width=130, inline=False)
sel_mth_oper = Select(options=opers,
value=ed.sel_mth_oper,
title='month oper',
**sel_size_kwargs)
sel_mth_num = Select(options=mth_str_list,
value=ed.sel_mth_num,
title='month num',
**sel_size_kwargs)
chk_display = CheckboxGroup(labels=['scatter', 'poly_fit',
'mean', 'savgol'],
active=ed.chk_display,
height=40,
width=70,
inline=False)
sel_ytype = Select(options=['diff', 'abs'],
value=ed.sel_ytype,
title='ytype',
**sel_size_kwargs)
sel_xtype = Select(options=['prop_s', 'prop_r',
'pcnt_s', 'pcnt_r'],
value=ed.sel_xtype,
title='xtype',
**sel_size_kwargs)
# size_alpha tab:
for eg in eg_list:
sl_size_dict[eg] = Slider(start=.5,
end=max_dot_size,
value=start_dot_size,
step=size_step, title='S',
bar_color=eg_cdict[eg],
**aux_slider_kwargs)
sl_alpha_dict[eg] = Slider(start=0.0, end=1.0,
value=start_marker_alpha,
step=alpha_step, title='A',
bar_color=eg_cdict[eg],
**aux_slider_kwargs)
slider_list.extend([sl_size_dict[eg], sl_alpha_dict[eg]])
but_slider_reset = Button(label='Reset', width=50)
but_slider_big = Button(label='S >', **size_alpha_kwargs)
but_slider_sml = Button(label='< S', **size_alpha_kwargs)
but_slider_aup = Button(label='A >', **size_alpha_kwargs)
but_slider_adn = Button(label='< A', **size_alpha_kwargs)
# grid_bg tab
sel_bgc = Select(options=all_colors,
value=ed.sel_bgc,
title='chart / edit_fill',
width=115, height=sel_height)
sel_gridc = Select(options=all_colors,
value=ed.sel_gridc,
title='grid / edit_line',
width=115, height=sel_height)
sel_bgc_alpha = Select(options=alphas,
value=ed.sel_bgc_alpha,
title='alpha',
width=70, height=sel_height)
sel_gridc_alpha = Select(options=alphas,
value=ed.sel_gridc_alpha,
title='alpha',
width=70, height=sel_height)
but_reset_colors = Button(label='Reset', width=60)
chk_minor_grid = CheckboxGroup(labels=['minor grid lines'],
active=ed.chk_minor_grid)
chk_color_apply = CheckboxGroup(labels=['chart bg/grid',
'edit zone'],
active=ed.chk_color_apply,
height=50)
sel_box_line_width = Select(options=widths,
value=ed.box_line_width,
title='edit_line_width',
width=70, height=sel_height)
# hover tab
chk_hover_on = CheckboxGroup(labels=['hover ON'],
active=ed.chk_hover_on,
width=150)
# get column names from hdict (first value of each tuple)
hover_labels = [val[0] for val in hdict.values()]
chk_hover_sel = CheckboxGroup(labels=hover_labels,
active=ed.chk_hover_sel,
width=120)
# density tab (stripplot):
slider_strip_size = Slider(start=.05, end=15.0,
value=ed.p2_marker_size,
step=.05, title='S',
height=40, width=200,
tooltips=False,
show_value=True,
bar_color='#e6e6e6')
slider_strip_alpha = Slider(start=.025, end=1.0,
value=ed.p2_marker_alpha,
step=.025, title='A',
height=40, width=200,
tooltips=False,
show_value=True,
bar_color='#e6e6e6')
slider_edit_zone = RangeSlider(start=0.0, end=ed.ez_end,
value=(float(ed.x_low), float(ed.x_high)),
step=ed.ez_step,
title='edit range values',
width=slider_edit_width,
bar_color='#a6a6a6', direction='rtl',
show_value=True)
plot_note = Label(**plot_kwargs)
calc_note = Label(**calc_kwargs)
# Spacer Widgets...................
# layout column spacers (between left, center, and right controls)
spacer_controls1 = Spacer(width=50)
spacer_controls2 = Spacer(width=50)
# squeeze tab
spacer_sqz_but2 = Spacer(width=but_space_width)
spacer_sqz_but3 = Spacer(width=but_space_width)
spacer_toggle_1 = Spacer(width=toggle_space_width)
spacer_toggle_center1 = Spacer(width=toggle_center_width)
spacer_toggle_center2 = Spacer(width=toggle_center_width)
spacer_toggle_2 = Spacer(width=toggle_space_width)
# animate tab
spacer_anim1 = Spacer(width=60, height=but_height)
spacer_anim_refresh = Spacer(width=60, height=but_height)
spacer_anim2 = Spacer(width=60, height=but_height)
# proposal_save tab
spacer_top_save = Spacer(width=but_save_width, height=50)
spacer_middle_save = Spacer(width=35, height=aux_slider_height)
# above sel_measure dropdown (center column)
spacer_top_center_col = Spacer(height=40, width=sel_width)
# display tab:
spacer_top_disp = Spacer(width=200, height=45)
spacer_disp_mth1 = Spacer(width=35)
spacer_disp_mth2 = Spacer(width=35)
spacer_disp_ax1 = Spacer(width=35)
spacer_disp_ax2 = Spacer(width=35)
# size_alpha tab
spacer_top_size_alpha = Spacer(width=50, height=50)
spacer_size_buts = Spacer(width=30)
spacer_alpha_buts = Spacer(width=30)
# grid_bg tab
spacer_linesbg_col = Spacer(width=60)
spacer_linesbg_col2 = Spacer(width=5)
spacer_top_color_apply = Spacer(width=70, height=40)
spacer_linesbg_bottom = Spacer(width=75)
# edit zone slider (left margin)
spacer_edit = Spacer(width=40)
# ------END widget declarations---------------------------------
# ------START Class instantiations------------------------------
proposal = PropOrder()
diff_str = Data()
filt_str = Data()
skel = Data()
ds_stand = Data()
base_ds = Data()
calc_ds = Data()
idx_df = Data()
filt_df = Data()
strip_df = Data()
reorder_df = Data()
anim_df = Data()
mgrps_gb = Data()
filt_xax = Data()
idx_xax = Data()
alpha_filt_arr = Data()
eg_filt_arr = Data()
zero_filt_arr = Data()
size_filt_arr = Data()
tool_tips = Data()
hover_tool = Data()
crosshair_tool = Data()
polys = Kwargs()
means = Kwargs()
savgols = Kwargs()
src_dict = Kwargs()
# ------figures, sources, tool classes----------------------------
p1 = figure(min_border_left=50, tools=p1_tools)
p2 = figure(min_border_left=50, width=plot_width, height=strip_height,
x_range=DataRange1d(flipped=True, range_padding=0.0),
y_range=DataRange1d(flipped=True, range_padding=0.05),
tools=p2_tools)
source1 = ColumnDataSource(data=dict(a=[], c=[], s=[], x=[], y=[]))
source2 = ColumnDataSource(data=dict(a=[], c=[], eg=[], s=[], x=[]))
# --------------------------------------------------------------
box_kwargs = dict(fill_alpha=float(ed.box_fill_alpha),
fill_color=ed.box_fill_color,
line_color=ed.box_line_color,
line_alpha=float(ed.box_line_alpha),
line_width=float(ed.box_line_width),
level='underlay',
)
box1 = BoxAnnotation(**box_kwargs.copy())
box2 = BoxAnnotation(**box_kwargs.copy())
# ------polyfit, mean, and savgol smoothing line glyphs-------
# dummy nan dict
nan_dict = dict(x=np.full(1, np.nan), y=np.full(1, np.nan))
# line glyphs arguments
poly_kwargs = dict(x="x", y="y",
line_width=15, line_alpha=0.7)
mean_kwargs = dict(x="x", y="y",
line_width=6, line_alpha=0.7)
savgol_kwargs = dict(x="x", y="y",
line_width=8, line_alpha=0.7)
for eg in eg_list:
# ----make line glyphs------------------------
polys.kdict['p' + str(eg)] = Line(line_color=eg_cdict[eg],
**poly_kwargs)
means.kdict['m' + str(eg)] = Line(line_color=eg_cdict[eg],
**mean_kwargs)
savgols.kdict['s' + str(eg)] = Line(line_color=eg_cdict[eg],
**savgol_kwargs)
# ----line glyphs data source instantiation----
src_dict.kdict['sp' + str(eg)] = \
ColumnDataSource(data=nan_dict.copy())
src_dict.kdict['sm' + str(eg)] = \
ColumnDataSource(data=nan_dict.copy())
src_dict.kdict['ss' + str(eg)] = \
ColumnDataSource(data=nan_dict.copy())
# hover and crosshair tools
hover_tool.data = HoverTool(formatters={'ldate': 'datetime',
'retdate': 'datetime'},
show_arrow=False)
hover_cols = Data()
crosshair_tool.data = CrosshairTool(dimensions='both',
line_alpha=.3,
line_color='red',
line_width=.75)
# ------END Class instantiations------------------------------
# ------START Callback functions------------------------------
# squeeze source
def sqz_type_change(attr, old, new):
ed.sel_sqz_type = new
def emp_group_change(attr, old, new):
ed.sel_emp_grp = new
def sqz_dir_change(attr, old, new):
ed.sel_sqz_dir = new
def update_squeeze(attr, old, new):
ed.slider_squeeze = new
# toggle line adjustment:
def line1_add():
low_slider = ed.x_low
high_slider = ed.x_high
if ed.sel_xtype in ['prop_s', 'prop_r']:
high_slider += 1
else:
if high_slider < 1.0:
high_slider += .001
slider_edit_zone.value = (low_slider, high_slider)
def line1_sub():
low_slider = ed.x_low
high_slider = ed.x_high
if high_slider > low_slider:
if ed.sel_xtype in ['prop_s', 'prop_r']:
high_slider -= 1
else:
high_slider -= .001
slider_edit_zone.value = (low_slider, high_slider)
def line0_add():
low_slider = ed.x_low
high_slider = ed.x_high
if low_slider < high_slider:
if ed.sel_xtype in ['prop_s', 'prop_r']:
low_slider += 1
else:
low_slider += .001
slider_edit_zone.value = (low_slider, high_slider)
def line0_sub():
low_slider = ed.x_low
high_slider = ed.x_high
if ed.sel_xtype in ['prop_s', 'prop_r']:
low_slider -= 1
else:
if low_slider > 0.0:
low_slider -= .001
slider_edit_zone.value = (low_slider, high_slider)
def perform_squeeze(): # make new order for sripplot and/or skeleton
if ed.sel_proposal != 'edit':
sel_proposal.value = 'edit'
squeeze_eg = int(ed.sel_emp_grp)
ed.x_low = slider_edit_zone.value[0]
ed.x_high = slider_edit_zone.value[1]
low_val = f.cross_val(filt_xax.data, ed.x_low, idx_xax.data)
high_val = f.cross_val(filt_xax.data, ed.x_high, idx_xax.data)
if sel_sqz_type.value == 'log':
direction = drop_dir_dict[ed.sel_sqz_dir]
factor = slider_squeeze.value * .005
squeezer = f.squeeze_logrithmic(reorder_df.data,
squeeze_eg,
low_val, high_val,
direction=direction,
put_segment=1,
log_factor=factor)
if sel_sqz_type.value == 'slide':
incr_dir_correction = incr_dir_dict[ed.sel_sqz_dir]
increment = slider_squeeze.value * incr_dir_correction
squeezer = f.squeeze_increment(reorder_df.data,
squeeze_eg,
low_val, high_val,
increment=increment)
strip_df.update_data(reorder_df.data.copy())
strip_df.data['prop_s'] = squeezer
strip_df.data.drop(['new_order'], axis=1, inplace=True)
for col in ['c', 'eg']:
strip_df.data[col] = source2.data[col]
strip_df.data['a'] = ed.p2_marker_alpha
strip_df.data['s'] = ed.p2_marker_size
strip_df.data.sort_values('prop_s', inplace=True)
reorder_df.data['new_order'] = squeezer
reorder_df.data.sort_values('new_order', inplace=True)
reorder_df.data['new_order'] = np.arange(1, len(reorder_df.data) + 1,
dtype='int')
proposal.update_order(reorder_df.data[['new_order']])
update_stripplot()
# extra filters
def update_sel_filt1(attr, old, new):
ed.sel_filt1 = new
def update_sel_filt2(attr, old, new):
ed.sel_filt2 = new
def update_sel_filt3(attr, old, new):
ed.sel_filt3 = new
def update_oper1(attr, old, new):
ed.sel_oper1 = new
def update_oper2(attr, old, new):
ed.sel_oper2 = new
def update_oper3(attr, old, new):
ed.sel_oper3 = new
def update_txt_input1(attr, old, new):
ed.txt_input1 = new
def update_txt_input2(attr, old, new):
ed.txt_input2 = new
def update_txt_input3(attr, old, new):
ed.txt_input3 = new
# animate
def animate_source(attr, old, new):
use_hover = ed.chk_hover_on and ed.chk_hover_sel
if mgrps_gb.data:
hover_dict = {}
# try to find data for selected month group, if none found, stop
try:
mth = mgrps_gb.data.get_group(new)
except:
label.text = 'NO DATA'
return
x = mth[ed.sel_xtype].values
y = mth[ed.sel_ytype].values
c = mth['c'].values
a = mth['a'].values
s = mth['s'].values
eg = mth['eg'].values
s1_dict = {'x': x, 'y': y, 'c': c,
'a': a, 's': s, 'eg': eg}
if use_hover:
for idx in ed.chk_hover_sel:
col = hdict[idx][0]
if col != ed.sel_measure:
hover_dict[col] = mth[col].values
s1_dict.update(hover_dict)
source1.update(data=s1_dict)
label.text = date_list[new]
sel_mth_num.value = str(new)
# reset "running" values for edit zone value conversion using
# the cross_val function (use current month values, not
# the values from the last time the "plot" button was used)
if ed.sel_xtype in ['prop_r', 'pcnt_r']:
filt_xax.data = x
idx_xax.data = mth['prop_s'].values
def animate():
global cb_id
box1.right, box1.left = None, None
if but_play.label == '► Play':
but_play.label = '❚❚ Pause'
cb_id = doc.add_periodic_callback(animate_update, animate_speed)
else:
but_play.label = '► Play'
doc.remove_periodic_callback(cb_id)
def reset():
box1.right, box1.left = None, None
slider_animate.value = 0
sel_mth_num.value = '0'
sel_mth_oper.value = '=='
def refresh():
eg_arr = anim_df.data['eg'].values
for eg, slider in sl_size_dict.items():
np.put(anim_df.data['s'], np.where(eg_arr == eg)[0], slider.value)
for eg, slider in sl_alpha_dict.items():
np.put(anim_df.data['a'], np.where(eg_arr == eg)[0], slider.value)
# capture the new size and alpha values for the month groupby data
mgrps_gb.update_data(anim_df.data.groupby('mnum'))
def fwd1():
box1.right, box1.left = None, None
new_val = slider_animate.value + 1
if new_val < max_month:
slider_animate.value = new_val
sel_mth_num.value = str(new_val)
def back1():
box1.right, box1.left = None, None
new_val = slider_animate.value - 1
if new_val >= 0:
slider_animate.value = new_val
sel_mth_num.value = str(new_val)
def animate_update():
box1.right, box1.left = None, None
mth = slider_animate.value + 1
if mth > max_month:
mth = 0
slider_animate.value = mth
sel_mth_num.value = str(mth)
# def prepare_animate(attr, old, new):
# pass
# future development...trails
# proposal_save
# grab the widget values, create a dictionary, pickle
def store_vals():
with open('dill/editor_dict.pkl', 'wb') as handle:
pickle.dump(vars(ed),
handle,
protocol=pickle.HIGHEST_PROTOCOL)
def save_edited_df():
store_vals()
calc_ds.data.to_pickle('dill/ds_edit.pkl')
save_edited_order()
def save_edited_order():
reorder_df.data[['new_order']].to_pickle('dill/p_edit.pkl')
def save_order_to_excel():
xl_str = 'excel/' + ed.case + '/proposals.xlsx'
df = reorder_df.data[['new_order']]
df = df.reset_index()[['empkey']]
df.index = df.index + 1
df.index.name = 'order'
ws_dict = pd.read_excel(xl_str, index_col=0, sheet_name=None)
ws_dict['edit'] = df
with pd.ExcelWriter(xl_str, engine='xlsxwriter') as writer:
for ws_name, df_sheet in ws_dict.items():
df_sheet.to_excel(writer, sheet_name=ws_name)
def base_change(attr, old, new):
ed.sel_base = new
def cond_change(attr, old, new):
ed.sel_cond = new
def find_order():
try: # look for edit list or compare list (determined by sel_proposal)
if ed.sel_proposal == 'edit': # edit order
prop_name = 'edit'
if proposal.list_order is not None:
df_order = proposal.list_order
else:
df_order = pd.read_pickle('dill/p_edit.pkl')
else: # reset to compare order
prop_name = ed.sel_proposal
df_order = pd.read_pickle('dill/p_' + ed.sel_proposal + '.pkl')
except OSError: # above not found, default to first found
df_order, prop_name = use_first_proposal_found('edit')
proposal.update_order(df_order)
proposal.update_name(prop_name)
def proposal_change(attr, old, new):
ed.sel_proposal = new
# set the proposal.list_order
find_order()
# Center Column
def measure_change(attr, old, new):
ed.sel_measure = new
def calc_button():
label.text = ''
calc_note.visible = True
find_order()
calc_dataset()
join_dataset()
update_main_plot()
update_stripplot()
calc_note.visible = False
def plot_button():
label.text = ''
plot_note.visible = True
join_dataset()
update_main_plot()
plot_note.visible = False
def calc_dataset():
# this routine creates a new integrated dataset based on a given
# list order and list of job assignment conditions
# to change calculation order,
# update the proposal.list_order property...
# save the input list order (not every time a squeeze is done) if
# the edit proposal is selected (sel_proposal).
# if the proposal is not edit, the order column is 'idx',
# not 'new_order'.
# This avoids saving a non-edit proposal list as an edited list.
if 'new_order' in proposal.list_order.columns:
proposal.list_order.to_pickle('dill/p_edit.pkl')
# save the widget settings
store_vals()
# calling the main integrated dataset generation routine...
ds = make_dataset(proposal_name=proposal.name,
df_order=proposal.list_order,
conditions=cond_dict[ed.sel_cond],
ds=skel.data,
ds_stand=ds_stand.data)
calc_ds.update_data(ds) # set to instance of Data class
def update_axis_formats():
if len(filt_df.data):
if ed.sel_ytype == 'abs':
if ed.sel_measure in ['cpay', 'mpay', 'ylong', 'mlong',
'age', 'scale', 's_lmonths']:
ed.cht_yflipped = False
else:
ed.cht_yflipped = True
else:
ed.cht_yflipped = False
p1.y_range.update(flipped=ed.cht_yflipped)
if ed.sel_measure in pcnt_cols:
p1.yaxis[0].formatter = NumeralTickFormatter(format="0.0%")
else:
if ed.sel_measure in float_cols:
p1.yaxis[0].formatter = NumeralTickFormatter(format="0.0")
elif ed.sel_measure in date_cols:
p1.yaxis[0].formatter = DatetimeTickFormatter(years=['%Y'])
else:
p1.yaxis[0].formatter = NumeralTickFormatter(format="0")
if ed.sel_xtype in ['pcnt_s', 'pcnt_r']:
p1.xaxis[0].formatter = NumeralTickFormatter(format="0.0%")
if (slider_edit_zone.value[1] > 1 or
slider_edit_zone.value[1] > max(filt_xax.data)):
ed.x_high = .65 * max(filt_xax.data)
ed.x_low = .45 * max(filt_xax.data)
ed.ez_step = .001
else:
p1.xaxis[0].formatter = NumeralTickFormatter(format="0")
if (slider_edit_zone.value[1] <= 1 or
slider_edit_zone.value[1] > max(filt_xax.data)):
ed.x_high = int(.65 * max(filt_xax.data))
ed.x_low = int(.45 * max(filt_xax.data))
ed.ez_step = 1
slider_edit_zone.update(end=max(filt_xax.data),
step=ed.ez_step,
value=(ed.x_low, ed.x_high))
def join_dataset():
label.text = ''
ret_only = 1 in ed.chk_filter
extra_filter = 0 in ed.chk_filter
base_cols = [ed.sel_measure, 'mnum']
calc_ds_cols = [ed.sel_measure, 'mnum', 'new_order', 'eg']
if ret_only:
calc_ds_cols.append('ret_mark')
# if extra filters are to be used, the appropriate columns are
# added to the dataframe
if extra_filter: # this means filter is checked
a1 = ed.sel_filt1
a2 = ed.sel_filt2
a3 = ed.sel_filt3
o1 = ed.sel_oper1
o2 = ed.sel_oper2
o3 = ed.sel_oper3
v1 = ed.txt_input1
v2 = ed.txt_input2
v3 = ed.txt_input3
# filtlist means "filter list"
attr_filtlist = [a1, a2, a3]
oper_filtlist = [o1, o2, o3]
vals_filtlist = [v1, v2, v3]
# add filter columns
filt_cols = []
for i, attr in enumerate(attr_filtlist):
if attr_filtlist[i] and oper_filtlist[i] and vals_filtlist[i]:
filt_cols.append(attr)
filt_cols = list(set(filt_cols))
calc_ds_cols = list(set().union(calc_ds_cols, filt_cols))
if ed.chk_hover_on and ed.chk_hover_sel:
hover_cols.data = []
for key in ed.chk_hover_sel:
col = hdict[key][0]
if col != ed.sel_measure:
hover_cols.data.append(col)
if hover_cols.data:
calc_ds_cols = list(set().union(calc_ds_cols,
hover_cols.data))
# -----------------------------------------------------------------
# BASE DATAFRAME (not filtered)
# assign base_ds - check if stored dataset must be read from disc
# or current base_ds may be used
if ed.sel_base == 'standalone':
base_ds.data = ds_stand.data
else:
if ed.sel_base != ed.base_ds_name:
base_ds.data = pd.read_pickle('dill/ds_' +
ed.sel_base + '.pkl')
ed.base_ds_name = ed.sel_base
df = base_ds.data[base_cols].copy()
df.rename(columns={ed.sel_measure: ed.sel_measure + '_b'},
inplace=True)
# for stripplot and squeeze (month zero):
data_reorder = calc_ds.data[calc_ds.data.mnum == 0][['eg']].copy()
data_reorder['new_order'] = \
np.arange(len(data_reorder)).astype(int) + 1
# set the df attribute of the reorder_df Data object:
reorder_df.update_data(data_reorder)
# index df for range values conversion (integrated ds month zero order)
idx_df.update_data(reorder_df.data[[]].copy())
idx_df.data['orig_order'] = np.arange(len(idx_df.data)) + 1
join_ds = calc_ds.data[calc_ds_cols].copy()
# add mnum to index
df.set_index('mnum', append=True, inplace=True)
join_ds.set_index('mnum', append=True, inplace=True)
join_ds.rename(columns={ed.sel_measure: 'abs', 'new_order': 'prop_s'},
inplace=True)
# JOIN BASE and COMPARE
df = df.join(join_ds)
df.reset_index(level='mnum', inplace=True)
df.sort_values(['mnum', 'prop_s'], inplace=True)
strip_df.update_data(df[df.mnum == 0][['prop_s', 'eg']].copy())
# set up color column - Note rgba values do not work with this
egs = strip_df.data['eg'].values
clr = np.empty(len(strip_df.data), dtype='object')
for eg in eg_list:
np.put(clr, np.where(egs == eg)[0], eg_cdict[eg])
strip_df.data['c'] = clr
strip_df.data['a'] = ed.p2_marker_alpha
strip_df.data['s'] = ed.p2_marker_size
# running (monthly) proposal list ordering
df['prop_r'] = df.groupby('mnum').cumcount() + 1
prop_r = df.prop_r.values
eg_vals = df.eg.values
eg_denom_dict = df.groupby('eg').prop_r.max().to_dict()
denoms = np.zeros(eg_vals.size)
for eg in eg_list:
np.put(denoms, np.where(eg_vals == eg)[0], eg_denom_dict[eg])
df['pcnt_r'] = prop_r / denoms
df['pcnt_s'] = f.make_starting_val_column(df, 'pcnt_r',
inplace=False)
# FILTERING
# ret only filter
if ret_only: # this means ret_only is checked
df = df[eval('(df.ret_mark == 1)')].copy()
if extra_filter:
df, filt_str.data = filter_ds(df,
attr1=a1, oper1=o1, val1=v1,
attr2=a2, oper2=o2, val2=v2,
attr3=a3, oper3=o3, val3=v3)
filt_str.data = ', with filter: [ ' + filt_str.data + ' ]'
else:
filt_str.data = ''
df = add_source_columns(df)
df.sort_values(by='prop_s', inplace=True)
# make dataframe and groupby source for animation
anim_df.data = df.copy()
mgrps_gb.update_data(anim_df.data.groupby('mnum'))
# month filter
mnum_oper = ed.sel_mth_oper
mnum_val = ed.sel_mth_num
mnum_filt_str = ' '.join(['mnum', mnum_oper, mnum_val])
mnum_str = '(df.' + mnum_filt_str + ')'
df_display = df[eval(mnum_str)].copy()
if len(df_display):
filt_df.update_data(df_display)
# make arrays from filt_df
filt_xax.update_data(filt_df.data[ed.sel_xtype].values)
idx_xax.update_data(filt_df.data['prop_s'].values)
alpha_filt_arr.update_data(filt_df.data['a'].values)
eg_filt_arr.update_data(filt_df.data['eg'].values)
zero_filt_arr.update_data(np.full(len(filt_df.data), 0.0))
size_filt_arr.update_data(filt_df.data['s'].values)
slider_edit_zone.update(end=max(filt_xax.data))
else:
# if df_display is empty (through use of extra filters):
label.text = 'NO DATA: mth ' + ed.sel_mth_num
def add_source_columns(df):
# set up color column - Note rgba values do not work with this
egs = df['eg'].values
clr = np.empty(len(df), dtype='object')
alph = np.zeros(len(df))
sze = np.zeros(len(df))
# set colors from eg_cdict and set size and alpha from slider values
for eg in eg_list:
these_idx = np.where(egs == eg)[0]
np.put(clr, these_idx, eg_cdict[eg])
np.put(alph, these_idx, sl_alpha_dict[eg].value)
np.put(sze, these_idx, sl_size_dict[eg].value)
df['c'] = clr
df['a'] = alph
df['s'] = sze
# add "diff" column if selected by sel_ytype dropdown widget input
if ed.sel_ytype == 'diff':
diff_str.data = ' vs ' + ed.sel_base + ' '
if ed.sel_measure not in no_invert:
df['diff'] = df[ed.sel_measure + '_b'] - df['abs']
else:
df['diff'] = df['abs'] - df[ed.sel_measure + '_b']
else:
diff_str.data = ' '
return df
def update_main_plot():
p1.title.text = (proposal.name + diff_str.data +
ed.sel_measure.upper() +
' ' + ed.sel_ytype + ' values' +
filt_str.data)
if 0 in ed.chk_display:
acol = alpha_filt_arr.data
else:
acol = zero_filt_arr.data
source1.data = {'x': filt_df.data[ed.sel_xtype].values,
'y': filt_df.data[ed.sel_ytype].values,
'c': filt_df.data['c'].values,
'a': acol,
's': filt_df.data['s'].values,
'eg': filt_df.data['eg'].values}
if ed.chk_hover_on and ed.chk_hover_sel:
for key in ed.chk_hover_sel:
col = hdict[key][0]
if col != ed.sel_measure:
source1.add(data=filt_df.data[col].values,
name=col)
xl = float(ed.x_low)
xh = float(ed.x_high)
box1.left, box1.right = xh, xl
xl2 = f.cross_val(filt_xax.data, xl, idx_xax.data)
xh2 = f.cross_val(filt_xax.data, xh, idx_xax.data)
box2.left, box2.right = xh2, xl2
clear_line_data()
update_axis_formats()
update_line_data()
def make_plots(return_plots=False):
p1.plot_width = plot_width # ed.cht_xsize
p1.plot_height = plot_height # ed.cht_ysize
p1.y_range = DataRange1d(range_padding=0.0)
p1.x_range = DataRange1d(end=0.0, flipped=True, range_padding=0.0)
p1.title.text = (proposal.name + diff_str.data +
ed.sel_measure.upper() +
' ' + ed.sel_ytype + ' values' +
filt_str.data)
p1.background_fill_color = ed.sel_bgc
p1.background_fill_alpha = float(ed.sel_bgc_alpha)
p1.add_tools(crosshair_tool.data)
p1.add_tools(hover_tool.data)
p1.toolbar.active_inspect = [hover_tool.data]
# p1.output_backend = 'webgl'
box1.left, box1.right = ed.x_high, ed.x_low
p2.background_fill_color = ed.sel_bgc
p2.background_fill_alpha = float(ed.sel_bgc_alpha)
# p2.output_backend = 'webgl'
box2.left, box2.right = ed.x_high, ed.x_low
# source1 dictionary assignment
src1_dict = {'x': filt_df.data[ed.sel_xtype],
'y': filt_df.data[ed.sel_ytype],
'c': filt_df.data['c'],
'a': filt_df.data['a'],
's': filt_df.data['s'],
'eg': filt_df.data['eg']}
if ed.chk_hover_on and ed.chk_hover_sel:
hover_dict = {}
for idx in ed.chk_hover_sel:
col = hdict[idx][0]
if col != ed.sel_measure:
hover_dict[col] = filt_df.data[col].values
src1_dict.update(hover_dict)
source1.update(data=src1_dict)
# source2 dictionary assignment
src2_dict = {'x': strip_df.data['prop_s'],
# 'y': df_display[yval],
'c': strip_df.data['c'],
'a': strip_df.data['a'],
's': strip_df.data['s'],
'eg': strip_df.data['eg']}
# set ColumnDataSource data
source1.data = src1_dict
source2.data = src2_dict
# ------------------------------------------------------------------
p1.grid.grid_line_color = ed.sel_gridc
p1.grid.grid_line_alpha = float(ed.sel_gridc_alpha)
p1.toolbar.logo = None
p1.grid.minor_grid_line_color = ed.sel_gridc
p1.grid.minor_grid_line_alpha = ed.minor_grid_alpha
p1.grid.minor_grid_line_dash = 'dotted'
p1.circle('x', 'y', color='c', size='s',
alpha='a',
line_color=marker_edge_color,
line_width=marker_edge_width,
source=source1)
p2.circle(x='x',
y=jitter('eg', width=0.92, distribution="uniform"),
color='c',
size='s',
alpha='a',
line_color=None,
source=source2)
p2.yaxis[0].ticker.desired_num_ticks = len(eg_list)
p2.yaxis.minor_tick_line_color = None
p2.ygrid.grid_line_color = None
p2.xgrid.grid_line_color = ed.sel_gridc
p2.xgrid.grid_line_alpha = float(sel_gridc_alpha.value)
p2.toolbar.logo = None
p1.add_layout(box1)
p2.add_layout(box2)
# p2.add_glyph(quad2_source, quad2)
add_line_glyphs(eg_list)
update_line_data()
update_axis_formats()
# zeroline
zeroline = Span(location=0, dimension='width',
line_dash='dashed',
line_color='red', line_width=1)
p1.add_layout(zeroline)
p1.add_layout(label)
if return_plots:
return p1, p2
# display tab
def filter_change(attr, old, new):
ed.chk_filter = list(chk_filter.active)
if 1 not in ed.chk_filter:
sel_mth_oper.value = '=='
def add_line_glyphs(eg_list):
for eg in eg_list:
p1.add_glyph(src_dict.kdict['sp' + str(eg)],
glyph=polys.kdict['p' + str(eg)])
p1.add_glyph(src_dict.kdict['sm' + str(eg)],
glyph=means.kdict['m' + str(eg)])
p1.add_glyph(src_dict.kdict['ss' + str(eg)],
glyph=savgols.kdict['s' + str(eg)])
def update_line_data():
chkd = set.intersection(set([1, 2, 3]), set(ed.chk_display))
# scatter markers
if 0 in ed.chk_display:
source1.data.update(a=alpha_filt_arr.data)
else:
source1.data.update(a=zero_filt_arr.data)
if chkd:
for eg in pd.unique(filt_df.data['eg']):
eg_df = filt_df.data[filt_df.data['eg'] == eg].copy()
xlvals = eg_df[ed.sel_xtype].values
ylvals = eg_df[ed.sel_ytype].values
idx = np.isfinite(xlvals) & np.isfinite(ylvals)
xlvals = xlvals[idx]
ylvals = ylvals[idx]
# poly_fit
if 1 in chkd:
pdata = poly.fit(xlvals, ylvals, poly_dim).linspace()
src_dict.kdict['sp' + str(eg)].data = \
dict(x=list(pdata[0]), y=list(pdata[1]))
# mean
if 2 in chkd:
yma = ema(ylvals, ema_len)
src_dict.kdict['sm' + str(eg)].data = dict(x=xlvals,
y=yma)
# Savitzky–Golay filter
if 3 in chkd:
sf_data = sf(ylvals, savgol_window, savgol_fit)
sf_data[sf_data == np.nan] = 0
src_dict.kdict['ss' + str(eg)].data.update(x=xlvals,
y=sf_data)
def clear_line_data():
for eg in eg_list:
# poly_fit
src_dict.kdict['sp' + str(eg)].data.update(**nan_dict)
# mean
src_dict.kdict['sm' + str(eg)].data.update(**nan_dict)
# savgol
src_dict.kdict['ss' + str(eg)].data.update(**nan_dict)
def ema(arr, n):
"""
compute an n period exponential moving average.
"""
x = np.asarray(arr)
weights = np.exp(np.linspace(-1., 0., n))
weights /= weights.sum()
a = np.convolve(x, weights, mode='full')[:len(x)]
a[:n] = a[n]
return a
def display_change(attr, old, new):
ed.chk_display = list(chk_display.active)
clear_line_data()
update_line_data()
def month_oper_change(attr, old, new):
ed.sel_mth_oper = new
def month_num_change(attr, old, new):
ed.sel_mth_num = new
def ytype_change(attr, old, new):
ed.sel_ytype = new
def xtype_change(attr, old, new):
ed.sel_xtype = new
# size_alpha
def reset_sliders():
for s_slider in sl_size_dict.values():
s_slider.value = start_dot_size
for a_slider in sl_alpha_dict.values():
a_slider.value = start_marker_alpha
def slider_big():
for s_slider in sl_size_dict.values():
if s_slider.value < max_dot_size:
s_slider.value += size_step
def slider_sml():
for s_slider in sl_size_dict.values():
if s_slider.value >= size_step:
s_slider.value -= size_step
def slider_aup():
for a_slider in sl_alpha_dict.values():
if a_slider.value <= 1 - alpha_step:
a_slider.value += alpha_step
def slider_adn():
for slider in sl_alpha_dict.values():
if slider.value > 0:
slider.value -= alpha_step
# size_alpha source
def update_scat_size_p1(attr, old, new, eg):
s = sl_size_dict[eg].value
s_arr = np.array(source1.data['s'])
eg_arr = np.array(source1.data['eg'])
np.put(s_arr, np.where(eg_arr == eg)[0], s)
source1.data.update({'s': s_arr})
def update_scat_alpha_p1(attr, old, new, eg):
a = sl_alpha_dict[eg].value
a_arr = np.array(source1.data['a'])
eg_arr = np.array(source1.data['eg'])
np.put(a_arr, np.where(eg_arr == eg)[0], a)
source1.data.update({'a': a_arr})
# grid_bg
def update_bg_color(attr, old, new):
float_alpha = float(sel_bgc_alpha.value)
if 0 in ed.chk_color_apply:
p1.background_fill_color = sel_bgc.value
p1.background_fill_alpha = float_alpha
p2.background_fill_color = sel_bgc.value
p2.background_fill_alpha = float_alpha
ed.sel_bgc = sel_bgc.value
ed.sel_bgc_alpha = sel_bgc_alpha.value
if 1 in ed.chk_color_apply:
box1.fill_color = sel_bgc.value
box1.fill_alpha = float_alpha
box2.fill_color = sel_bgc.value
box2.fill_alpha = float_alpha
ed.box_fill_color = sel_bgc.value
ed.box_fill_alpha = float_alpha
def update_grid_color(attr, old, new):
float_alpha = float(sel_gridc_alpha.value)
if 0 in ed.chk_color_apply:
p1.grid.grid_line_color = sel_gridc.value
p1.grid.grid_line_alpha = float_alpha
p2.xgrid.grid_line_color = sel_gridc.value
p2.xgrid.grid_line_alpha = float_alpha
ed.sel_gridc = sel_gridc.value
ed.sel_gridc_alpha = sel_gridc_alpha.value
if 1 in ed.chk_color_apply:
box1.line_color = sel_gridc.value
box1.line_alpha = float_alpha
box2.line_color = sel_gridc.value
box2.line_alpha = float_alpha
ed.box_line_color = sel_gridc.value
ed.box_line_alpha = float_alpha
def reset_colors():
temp_chk_color_apply = ed.chk_color_apply
ed.chk_color_apply = [0, 1]
sel_bgc.value = 'White'
sel_bgc_alpha.value = '.10'
sel_gridc.value = 'Gray'
sel_gridc_alpha.value = '.20'
ed.sel_bgc = 'White'
ed.sel_bgc_alpha = '.10'
ed.sel_gridc = 'Gray'
ed.sel_gridc_alpha = '.20'
if chk_minor_grid.active:
p1.grid.minor_grid_line_color = 'Gray'
p1.grid.minor_grid_line_alpha = .20
else:
p1.grid.minor_grid_line_alpha = 0.0
sel_box_line_width.value = '1.0'
box1.line_color = 'black'
box1.line_alpha = .8
box2.line_color = 'black'
box2.line_alpha = .8
box1.fill_color = 'black'
box1.fill_alpha = .05
box2.fill_color = 'black'
box2.fill_alpha = .05
ed.chk_color_apply = temp_chk_color_apply
def minor_grid(attr, old, new):
if chk_minor_grid.active:
p1.grid.minor_grid_line_color = ed.sel_gridc
p1.grid.minor_grid_line_alpha = float(ed.sel_gridc_alpha)
else:
p1.grid.minor_grid_line_alpha = 0.0
ed.chk_minor_grid = list(chk_minor_grid.active)
def color_apply(attr, old, new):
ed.chk_color_apply = list(chk_color_apply.active)
def edit_line_width(attr, old, new):
ed.box_line_width = sel_box_line_width.value
box1.line_width = float(new)
box2.line_width = float(new)
# hover
def hover_tool_control(attr, old, new):
ed.chk_hover_sel = list(chk_hover_sel.active)
ed.chk_hover_on = list(chk_hover_on.active)
manage_hover_tool()
# make html for tooltip formatting
def manage_hover_tool():
if ed.chk_hover_on and ed.chk_hover_sel:
pre_div = ('<div style="background-color:' +
'rgba(0, 0, 0, 0.03);' +
'overflow: auto;">')
mid_div = ''
suf_div = '</div>'
for key in ed.chk_hover_sel:
col = hdict[key][0]
if col != ed.sel_measure:
mid_div += html_str % (col, ' ' + hdict[key][1])
hover_tool.data.tooltips = pre_div + mid_div + suf_div
else:
hover_tool.data.tooltips = None
tool_tips.data = None
hover_cols.data = []
# density (jitter stripplot)
def update_stripplot():
source2.data = dict(a=[], c=[], eg=[], s=[], x=[])
source2.data = dict(a=strip_df.data['a'],
c=strip_df.data['c'],
eg=strip_df.data['eg'],
s=strip_df.data['s'],
x=strip_df.data['prop_s'])
def update_scat_size_p2(attr, old, new):
size_arr = np.full(num_dots, new)
source2.data.update({'s': size_arr})
ed.p2_marker_size = new
def update_scat_alpha_p2(attr, old, new):
size_arr = np.full(num_dots, new)
source2.data.update({'a': size_arr})
ed.p2_marker_alpha = new
# edit range
def update_edit_range(attr, old, new):
# Get slider values
xl = slider_edit_zone.value[0]
xh = slider_edit_zone.value[1]
box1.left, box1.right = xh, xl
xl2 = f.cross_val(filt_xax.data, xl, idx_xax.data)
xh2 = f.cross_val(filt_xax.data, xh, idx_xax.data)
box2.left, box2.right = xl2, xh2
# update editor dict namespace
ed.x_low = xl
ed.x_high = xh
# -----END Callback functions-------------------------------
# -----START Callback actions-------------------------------
# squeeze
sel_sqz_type.on_change('value', sqz_type_change)
sel_sqz_dir.on_change('value', sqz_dir_change)
sel_emp_grp.on_change('value', emp_group_change)
slider_squeeze.on_change('value', update_squeeze)
but_squeeze.on_click(perform_squeeze)
but_0add.on_click(line0_add)
but_0sub.on_click(line0_sub)
but_1add.on_click(line1_add)
but_1sub.on_click(line1_sub)
# extra filters
sel_filt1.on_change('value', update_sel_filt1)
sel_filt2.on_change('value', update_sel_filt2)
sel_filt3.on_change('value', update_sel_filt3)
sel_oper1.on_change('value', update_oper1)
sel_oper2.on_change('value', update_oper2)
sel_oper3.on_change('value', update_oper3)
txt_input1.on_change('value', update_txt_input1)
txt_input2.on_change('value', update_txt_input2)
txt_input3.on_change('value', update_txt_input3)
# animate:
but_play.on_click(animate)
but_reset.on_click(reset)
but_refresh.on_click(refresh)
but_back.on_click(back1)
but_fwd.on_click(fwd1)
slider_animate.on_change('value', animate_source)
# commented for future development...
# chk_trails.on_change('active', prepare_animate)
# proposal_save
but_save_edit.on_click(save_edited_df)
but_save_order.on_click(save_order_to_excel)
sel_base.on_change('value', base_change)
sel_cond.on_change('value', cond_change)
sel_proposal.on_change('value', proposal_change)
# center column
sel_measure.on_change('value', measure_change)
but_plot.on_click(plot_button)
but_calc.on_click(calc_button)
# display
chk_filter.on_change('active', filter_change)
chk_display.on_change('active', display_change)
sel_mth_oper.on_change('value', month_oper_change)
sel_mth_num.on_change('value', month_num_change)
sel_ytype.on_change('value', ytype_change)
sel_xtype.on_change('value', xtype_change)
# size_alpha
for eg, slider in sl_size_dict.items():
slider.on_change('value', partial(update_scat_size_p1, eg=eg))
for eg, slider in sl_alpha_dict.items():
slider.on_change('value', partial(update_scat_alpha_p1, eg=eg))
but_slider_reset.on_click(reset_sliders)
but_slider_big.on_click(slider_big)
but_slider_sml.on_click(slider_sml)
but_slider_aup.on_click(slider_aup)
but_slider_adn.on_click(slider_adn)
# grid_bg
sel_bgc.on_change('value', update_bg_color)
sel_bgc_alpha.on_change('value', update_bg_color)
sel_gridc.on_change('value', update_grid_color)
sel_gridc_alpha.on_change('value', update_grid_color)
but_reset_colors.on_click(reset_colors)
chk_minor_grid.on_change('active', minor_grid)
chk_color_apply.on_change('active', color_apply)
sel_box_line_width.on_change('value', edit_line_width)
# hover
chk_hover_on.on_change('active', hover_tool_control)
chk_hover_sel.on_change('active', hover_tool_control)
# density (stripplot):
slider_strip_size.on_change('value', update_scat_size_p2)
slider_strip_alpha.on_change('value', update_scat_alpha_p2)
# edit range slider
slider_edit_zone.on_change('value', update_edit_range)
# ------END Callback Actions-------------------------------------
# ------START Initial Computations-------------------------------
# Read skeleton dataset
try:
skel.data = pd.read_pickle('dill/skeleton.pkl')
except OSError:
# exit routine if baseline dataset not found
print('skeleton.pkl not found, run make_skeleton.py?\n')
print('\n >>> exiting routine.\n')
sys.exit()
# Read standalone dataset/assign baseline dataset
try:
ds_stand.data = pd.read_pickle('dill/standalone.pkl')
if ed.sel_base == 'standalone':
base_ds.data = ds_stand.data.copy()
else:
# set BASELINE dataset if something other than standalone
try:
base_ds.data = pd.read_pickle('dill/ds_' +
ed.sel_base + '.pkl')
except OSError:
base_ds.data = ds_stand.data.copy()
print('invalid "base_ds" name input?\n' +
'standalone set as base\n')
except OSError:
# exit routine if baseline dataset not found
print('standalone.pkl or selected baseline dataset not found...\n' +
'run standalone.py?\n')
print('\n >>> exiting routine.\n')
sys.exit()
# initial order and dataset generation
find_order()
calc_dataset()
join_dataset()
p1, p2 = make_plots(return_plots=True)
manage_hover_tool()
# --------END Initial Computations-------------------------------
# --------START Widget Layout------------------------------------
# PANEL1
# squeeze tab items
squeeze_widgets = column(row(sel_sqz_type,
spacer_sqz_but2,
sel_emp_grp,
spacer_sqz_but3,
sel_sqz_dir),
row(slider_squeeze),
row(but_1add, spacer_toggle_1, but_1sub,
spacer_toggle_center1,
but_squeeze,
spacer_toggle_center2,
but_0add, spacer_toggle_2, but_0sub))
# extra filters tab items
filter_widgets = row(column(sel_filt1, sel_filt2, sel_filt3),
column(sel_oper1, sel_oper2, sel_oper3),
column(txt_input1,
txt_input2,
txt_input3))
# animate tab items
anim_row1 = row(but_play, spacer_anim1, but_reset)
anim_row2 = row(slider_animate)
anim_row3 = row(but_back, spacer_anim2, but_fwd,
spacer_anim_refresh, but_refresh)
anim_col1 = column(anim_row1, anim_row2, anim_row3)
# the commented items below are on hold for future development...
# anim_col2 = column(chk_trails, sel_trails)
# anim_items = row(anim_col1, spacer_anim, anim_col2)
anim_items = row(anim_col1)
# proposal_save
save_buttons = column(spacer_top_save,
row(but_save_edit),
row(but_save_order))
save_dropdowns = column(sel_base, sel_cond, sel_proposal)
save_widgets = row(save_buttons, spacer_middle_save, save_dropdowns)
# make panels for main tab group
panel1_tab1 = Panel(child=squeeze_widgets, title='squeeze')
panel1_tab2 = Panel(child=filter_widgets, title='extra filters')
panel1_tab3 = Panel(child=anim_items, title='animate')
panel1_tab4 = Panel(child=save_widgets, title='proposal_save')
# combine main panels into panel1 tab object
panel1 = Tabs(tabs=[panel1_tab1, panel1_tab2,
panel1_tab3, panel1_tab4], width=panel1_width,
height=controls_height)
# CENTER COLUMN
buttons_and_attr_sel = column(spacer_top_center_col,
sel_measure,
but_plot,
but_calc,
height=controls_height,
width=sel_width)
# PANEL2
# display
but_row1 = row(spacer_disp_mth1, sel_mth_oper,
spacer_disp_mth2, sel_mth_num)
but_row2 = row(spacer_disp_ax1, sel_ytype,
spacer_disp_ax2, sel_xtype)
but_col = column(spacer_top_disp, but_row1, but_row2)
chk_col = column(chk_filter, chk_display)
# display tab items
display_widgets = row(chk_col, but_col)
# size_alpha tab items
szal_sliders = row(slider_list)
sz_buttons = row(but_slider_sml, spacer_size_buts, but_slider_big)
al_buttons = row(but_slider_adn, spacer_alpha_buts, but_slider_aup)
szal_but_col = column(spacer_top_size_alpha, but_slider_reset,
sz_buttons, al_buttons,
width=120)
szal_items = row(szal_sliders, szal_but_col)
# grid_bg tab items
gbg_col1 = column(sel_bgc, sel_gridc,
height=chart_sel_height)
gbg_col2 = column(sel_bgc_alpha, sel_gridc_alpha,
height=chart_sel_height)
gbg_col12 = row(gbg_col1, spacer_linesbg_col,
gbg_col2, spacer_linesbg_col2)
gbg_bottom_row = row(but_reset_colors,
spacer_linesbg_bottom,
chk_minor_grid, width=200)
gbg_left = column(gbg_col12, gbg_bottom_row, width=300)
gbg_col3 = column(spacer_top_color_apply, chk_color_apply,
sel_box_line_width)
gbg_items = row(gbg_left, gbg_col3)
# hover tab items
hover_row = row(chk_hover_on, chk_hover_sel)
# make panels for aux tab group
panel2_tab1 = Panel(child=display_widgets, title='display')
panel2_tab2 = Panel(child=szal_items, title='size_alpha')
panel2_tab3 = Panel(child=gbg_items, title='grid_bg')
panel2_tab4 = Panel(child=hover_row, title='hover')
panel2_tab5 = Panel(child=column(slider_strip_size, slider_strip_alpha),
title='density')
# combine aux panels into panel2 tab object
panel2 = Tabs(tabs=[panel2_tab1, panel2_tab2, panel2_tab3,
panel2_tab4, panel2_tab5],
height=controls_height, width=panel2_width)
# --------END Widget Layout--------------------------------------
# --------START Main Layout--------------------------------------
p1_row = row(p1)
p2_row = row(p2)
p1.add_layout(calc_note)
p1.add_layout(plot_note)
l_o = layout(row(panel1,
spacer_controls1,
buttons_and_attr_sel,
spacer_controls2,
panel2),
row(spacer_edit, slider_edit_zone),
p1_row,
p2_row)
doc.add_root(l_o)
return doc
# --------END Main Layout----------------------------------------
[docs]def color_list():
'''provides a list of string color names for editor grid_bg tab
color selectors
'''
colors = ['AliceBlue', 'AntiqueWhite', 'Aqua', 'Aquamarine',
'Azure', 'Beige', 'Bisque', 'Black', 'BlanchedAlmond',
'Blue', 'BlueViolet', 'Brown', 'BurlyWood', 'CadetBlue',
'Chartreuse', 'Chocolate', 'Coral', 'CornflowerBlue',
'Cornsilk', 'Crimson', 'Cyan', 'DarkBlue', 'DarkCyan',
'DarkGoldenRod', 'DarkGray', 'DarkGrey', 'DarkGreen',
'DarkKhaki', 'DarkMagenta', 'DarkOliveGreen', 'Darkorange',
'DarkOrchid', 'DarkRed', 'DarkSalmon', 'DarkSeaGreen',
'DarkSlateBlue', 'DarkSlateGray', 'DarkSlateGrey',
'DarkTurquoise', 'DarkViolet', 'DeepPink', 'DeepSkyBlue',
'DimGray', 'DimGrey', 'DodgerBlue', 'FireBrick',
'FloralWhite', 'ForestGreen', 'Fuchsia', 'Gainsboro',
'GhostWhite', 'Gold', 'GoldenRod', 'Gray', 'Grey',
'Green', 'GreenYellow', 'HoneyDew', 'HotPink', 'IndianRed',
'Indigo', 'Ivory', 'Khaki', 'Lavender', 'LavenderBlush',
'LawnGreen', 'LemonChiffon', 'LightBlue', 'LightCoral',
'LightCyan', 'LightGoldenRodYellow', 'LightGray',
'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon',
'LightSeaGreen', 'LightSkyBlue', 'LightSlateGray',
'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
'LimeGreen', 'Linen', 'Magenta', 'Maroon',
'MediumAquaMarine', 'MediumBlue', 'MediumOrchid',
'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue',
'MediumSpringGreen', 'MediumTurquoise', 'MediumVioletRed',
'MidnightBlue', 'MintCream', 'MistyRose', 'Moccasin',
'NavajoWhite', 'Navy', 'OldLace', 'Olive', 'OliveDrab',
'Orange', 'OrangeRed', 'Orchid', 'PaleGoldenRod',
'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 'PapayaWhip',
'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Salmon',
'SandyBrown', 'SeaGreen', 'SeaShell', 'Sienna', 'Silver',
'SkyBlue', 'SlateBlue', 'SlateGray', 'SlateGrey', 'Snow',
'SpringGreen', 'SteelBlue', 'Tan', 'Teal', 'Thistle',
'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
'WhiteSmoke', 'Yellow', 'YellowGreen']
return colors
[docs]def alpha_list():
'''provides a list of string decimals for editor grid_bg tab
alpha selectors
'''
alphas = ['.00', '.01', '.02', '.03', '.04', '.05', '.06', '.07',
'.08', '.09', '.10', '.11', '.12', '.13', '.14', '.15',
'.16', '.17', '.18', '.19', '.20', '.21', '.22', '.23',
'.24', '.25', '.26', '.27', '.28', '.29', '.30', '.31',
'.32', '.33', '.34', '.35', '.36', '.37', '.38', '.39',
'.40', '.41', '.42', '.43', '.44', '.45', '.46', '.47',
'.48', '.49', '.50', '.51', '.52', '.53', '.54', '.55',
'.56', '.57', '.58', '.59', '.60', '.61', '.62', '.63',
'.64', '.66', '.68', '.70', '.72', '.75', '.77', '.80',
'.82', '.85', '.87', '.90', '.92', '.95', '.97', '1.0']
return alphas
[docs]def line_widths():
'''provides a list of string decimals for editor grid_bg tab
edit line width selector
'''
widths = ['0.1', '0.2', '0.3', '0.4', '0.5',
'0.6', '0.7', '0.8', '0.9', '1.0',
'1.1', '1.2', '1.3', '1.4', '1.5',
'1.6', '1.7', '1.8', '1.9', '2.0']
return widths
[docs]def use_first_proposal_found(proposal_name):
'''find and return the first list order found in 'dill/proposal_names.pkl'.
This function is used when another proposal name is designated by another
section of the program but does not exist.
inputs
proposal_name (string)
the name of the proposal which was not found
'''
try:
prop_names = \
pd.read_pickle('dill/proposal_names.pkl').proposals.tolist()
this_prop_name = prop_names[0]
stored_case = pd.read_pickle('dill/case_dill.pkl').case.value
print('\nerror : proposal name "' +
str(proposal_name) + '" not found...\n')
print('available proposal names are ', prop_names,
'for case study:',
stored_case)
print('< using ' + this_prop_name + '>')
return pd.read_pickle('dill/p_' + this_prop_name + '.pkl'), \
this_prop_name
except OSError:
print('dill/proposal_names.pkl' + ' or ' +
'dill/case_dill.pkl' + ' not found')
print('\n >>> exiting routine.\n')
sys.exit()
[docs]def make_dataset(proposal_name='',
df_order=None, # list order
conditions=[],
ds=None, # skeleton input
ds_stand=None): # used to calculate pre-implementation data
pre, suf = 'dill/', '.pkl'
order_name = 'p_' + proposal_name
# dataset_name = 'ds_' + proposal_name
order_file = (pre + order_name + suf)
sdict = pd.read_pickle('dill/dict_settings.pkl')
tdict = pd.read_pickle('dill/dict_job_tables.pkl')
num_of_job_levels = sdict['num_of_job_levels']
lspcnt_calc = sdict['lspcnt_calc_on_remaining_population']
try:
df_master = pd.read_pickle(pre + 'master' + suf)
except OSError:
print('Master list not found. Run build_program_files script?')
sys.exit()
# do not include inactive employees (other than furlough) in data model
df_master = df_master[
(df_master.line == 1) | (df_master.fur == 1)].copy()
# ORDER the skeleton df according to INTEGRATED list order.
# df_skel can initially be in any integrated order, each employee
# group must be in proper order relative to itself.
# Use the short-form 'idx' (order) column from either the proposed
# list or the new_order column from an edited list to create a new column,
# 'new_order', within the long-form df_skel. The new order column
# is created by data alignment using the common empkey indexes.
# The skeleton may then be sorted by month and new_order.
# (note: duplicate df_skel empkey index empkeys (from different months)
# are assigned the same order value)
if proposal_name == 'edit':
df_new_order = pd.read_pickle(order_file)
# if 'idx' in df_new_order.columns:
# df_new_order.rename(columns={'idx': 'new_order'}, inplace=True)
ds['new_order'] = df_new_order['new_order']
# dataset_file = (pre + 'ds_edit' + suf)
else:
ds_index = ds[ds.mnum == 0].index.values
df_order_index = df_order.index.values
# mask will remove any inactive employees existing
# within the list df_order proposal
mask = np.isin(df_order_index, ds_index)
df_order = df_order[mask].copy()
df_order_vals = df_order['idx'].values
# assign back to df_order column to permit index data alignment...
df_order['idx'] = st.rankdata(df_order_vals).astype(int)
ds['new_order'] = df_order['idx']
# dataset_file = (pre + dataset_name + suf)
# sort the skeleton by month and proposed list order
ds.sort_values(['mnum', 'new_order'], inplace=True)
# ORIG_JOB*
eg_sequence = df_master.eg.values
fur_sequence = df_master.fur.values
# create list of employee group codes from the master data
egs = sorted(pd.unique(eg_sequence))
# retrieve job counts array
jcnts_arr = tdict['jcnts_arr']
if 'prex' in conditions:
sg_rights = sdict['sg_rights']
sg_eg_list = []
sg_dict = od()
stove_dict = od()
# Find the employee groups which have pre-existing job rights...
# grab the eg code from each sg (special group) job right description
# and add to sg_eg_list
for line_item in sg_rights:
sg_eg_list.append(line_item[0])
# place unique eg codes into sorted list
sg_eg_list = sorted(pd.unique(sg_eg_list))
# Make a dictionary containing the special group data for each
# group with special rights
for eg in sg_eg_list:
sg_data = []
for line_item in sg_rights:
if line_item[0] == eg:
sg_data.append(line_item)
sg_dict[eg] = sg_data
for eg in egs:
if eg in sg_eg_list:
# (run prex stovepipe routine with eg dict key and value)
sg = df_master[df_master.eg == eg]['sg'].values
fur = df_master[df_master.eg == eg]['fur']
ojob_array = f.make_stovepipe_prex_shortform(
jcnts_arr[0][eg - 1], sg, sg_dict[eg], fur)
prex_stove = np.take(ojob_array, np.where(fur == 0)[0])
stove_dict[eg] = prex_stove
else:
# (run make_stovepipe routine with eg dict key and value)
stove_dict[eg] = f.make_stovepipe_jobs_from_jobs_arr(
jcnts_arr[0][eg - 1])
# use dict values as inputs to sp_arr,
# ordered dict maintains proper sequence...
sp_arr = list(np.array(list(stove_dict.values())))
# total of jobs per eg
eg_job_counts = np.add.reduce(jcnts_arr[0], axis=1)
orig_jobs = f.make_intgrtd_from_sep_stove_lists(sp_arr,
eg_sequence,
fur_sequence,
eg_job_counts,
num_of_job_levels)
else:
orig_jobs = f.make_original_jobs_from_counts(
jcnts_arr[0], eg_sequence,
fur_sequence, num_of_job_levels).astype(int)
# insert stovepipe job result into new column of proposal (month_form)
# this indexes the jobs with empkeys (orig_jobs is an ndarray only)
df_master['orig_job'] = orig_jobs
# ASSIGN JOBS - flush and no flush option*
# cmonths - career length in months for each employee.
# length is equal to number of employees
cmonths = f.career_months(df_master, sdict['starting_date'])
# nonret_each_month: count of non-retired employees remaining
# in each month until no more remain -
# length is equal to longest career length
nonret_each_month = f.count_per_month(cmonths)
all_months = np.sum(nonret_each_month)
high_limits = nonret_each_month.cumsum()
low_limits = f.make_lower_slice_limits(high_limits)
# job_level_counts = np.array(jcnts_arr[1])
if sdict['delayed_implementation']:
imp_month = sdict['imp_month']
imp_low = low_limits[imp_month]
imp_high = high_limits[imp_month]
# # read the standalone dataset (info is not in integrated order)
# ds_stand = pd.read_pickle(stand_path_string)
# get standalone data and order it the same as the integrated dataset.
# create a unique key column in the standalone data df and a temporary
# df which is ordered according to the integrated dataset
imp_cols, arr_dict, col_array = \
f.make_preimp_array(ds_stand, ds,
imp_high, sdict['compute_job_category_order'],
sdict['compute_pay_measures'])
# select columns to use as pre-implementation data for integrated
# dataset data is limited to the pre-implementation months
# aligned_jnums and aligned_fur arrays are the same as standalone data
# up to the end of the implementation month, then the standalone value
# for the implementation month is passed down unchanged for the
# remainder of months in the model. These arrays carry over
# standalone data for each employee group to be honored until and when
# the integrated list is implemented.
# These values from the standalone datasets (furlough status and
# standalone job held at the implementation date) are needed for
# subsequent integrated dataset job assignment calculations. Other
# standalone values are simply copied and inserted into the
# pre-implementation months of the integrated dataset.
delayed_jnums = col_array[arr_dict['jnum']]
delayed_fur = col_array[arr_dict['fur']]
aligned_jnums = f.align_fill_down(imp_low,
imp_high,
ds[[]], # indexed with empkeys
delayed_jnums)
aligned_fur = f.align_fill_down(imp_low,
imp_high,
ds[[]],
delayed_fur)
# now assign "filled-down" job numbers to numpy array
delayed_jnums[imp_low:] = aligned_jnums[imp_low:]
delayed_fur[imp_low:] = aligned_fur[imp_low:]
# ORIG_JOB and FUR (delayed implementation)
# then assign numpy array values to orig_job column of integrated
# dataset as starting point for integrated job assignments
ds['orig_job'] = delayed_jnums
ds['fur'] = delayed_fur
if sdict['integrated_counts_preimp']:
# assign combined job counts prior to the implementation date.
# (otherwise, separate employee group counts will be used when
# data is transferred from col_array at end of script)
# NOTE: this data is the actual number of jobs held within each
# category; could be less than the number of jobs available as
# attrition occurs
standalone_preimp_job_counts = \
f.make_delayed_job_counts(imp_month,
delayed_jnums,
low_limits,
high_limits)
col_array[arr_dict['job_count']][:imp_high] = \
standalone_preimp_job_counts
else:
# set implementation month at zero for job assignment routine
imp_month = 0
# ORIG_JOB and FUR (no delayed implementation)
# transfer proposal stovepipe jobs (month_form) to long_form via index
# (empkey) alignment...
ds['orig_job'] = df_master['orig_job']
# developer note: test to verify this is not instantiated elsewhere...
ds['fur'] = df_master['fur']
table = tdict['table']
j_changes = tdict['j_changes']
reduction_months = f.get_job_reduction_months(j_changes)
# copy selected columns from ds for job assignment function input below.
# note: if delayed implementation, the 'fur' and 'orig_job' columns
# contain standalone data through the implementation month.
df_align = ds[['eg', 'sg', 'fur', 'orig_job']].copy()
# JNUM, FUR, JOB_COUNT
if sdict['no_bump']:
# No bump, no flush option (includes conditions, furlough/recall,
# job changes schedules)
# this is the main job assignment function. It loops through all of
# the months in the model and assigns jobs
nbnf, job_count, fur = \
f.assign_jobs_nbnf_job_changes(df_align,
low_limits,
high_limits,
all_months,
reduction_months,
imp_month,
conditions,
sdict,
tdict,
fur_return=sdict['recall'])
ds['jnum'] = nbnf
ds['job_count'] = job_count
ds['fur'] = fur
# for create_snum_and_spcnt_arrays function input...
jnum_jobs = nbnf
else:
# Full flush and bump option (no conditions or
# furlough/recall schedulue considered, job changes are included)
# No bump, no flush applied up to implementation date
fbff, job_count, fur = f.assign_jobs_full_flush_job_changes(
nonret_each_month, table[0], num_of_job_levels)
ds['jnum'] = fbff
ds['job_count'] = job_count
ds['fur'] = fur
# for create_snum_and_spcnt_arrays function input...
jnum_jobs = fbff
# SNUM, SPCNT, LNUM, LSPCNT
monthly_job_counts = table[1]
ds['snum'], ds['spcnt'], ds['lnum'], ds['lspcnt'] = \
f.create_snum_and_spcnt_arrays(jnum_jobs, num_of_job_levels,
nonret_each_month,
monthly_job_counts,
lspcnt_calc)
# RANK in JOB
ds['rank_in_job'] = ds.groupby(['mnum', 'jnum'],
sort=False).cumcount() + 1
# JOBP
jpcnt = (ds.rank_in_job / ds.job_count).values
np.put(jpcnt, np.where(jpcnt == 1.0)[0], .99999)
ds['jobp'] = ds['jnum'] + jpcnt
# PAY - merge with pay table - provides monthly pay
if sdict['compute_pay_measures']:
# account for furlough time (only count active months)
if sdict['discount_longev_for_fur']:
# skel(ds) provides pre-calculated non-discounted scale data
# flip ones and zeros...
ds['non_fur'] = 1 - ds.fur.values
non_fur = ds.groupby([pd.Grouper('empkey')])['non_fur'] \
.cumsum().values
ds.pop('non_fur')
starting_mlong = ds.s_lmonths.values
cum_active_months = non_fur + starting_mlong
ds['mlong'] = cum_active_months
ds['ylong'] = ds['mlong'].values / 12
ds['scale'] = np.clip((cum_active_months / 12) + 1, 1,
sdict['top_of_scale']).astype(int)
# make a new long_form dataframe and assign a combination of
# pay-related ds columns from large dataset as its index...
# the dataframe is empty - we are only making an index-alignment
# vehicle to use with indexed pay table....
# the dataframe index contains specific scale, job, and contract year
# for each line in long_form ds
df_pt_index = pd.DataFrame(index=((ds['scale'].values * 100) +
ds['jnum'].values +
(ds['year'].values * 100000)))
if sdict['enhanced_jobs']:
df_pt = pd.read_pickle('dill/pay_table_enhanced.pkl')
else:
df_pt = pd.read_pickle('dill/pay_table_basic.pkl')
# 'data-align' small indexed pay_table to long_form df:
df_pt_index['monthly'] = df_pt['monthly']
ds['monthly'] = df_pt_index.monthly.values
# MPAY
# adjust monthly pay for any raise and last month pay percent if
# applicable
ds['mpay'] = ((ds['pay_raise'].values *
ds['mth_pcnt'].values *
ds['monthly'].values)) / 1000
ds.pop('monthly')
# CPAY
ds['cpay'] = ds.groupby('new_order')['mpay'].cumsum()
if sdict['delayed_implementation']:
ds_cols = ds.columns
# grab each imp_col (column to insert standalone or pre-implementation
# date data) and replace integrated data up through implementation
# date
for col in imp_cols:
if col in ds_cols:
arr = ds[col].values
arr[:imp_high] = col_array[arr_dict[col]][:imp_high]
ds[col] = arr
# CAT_ORDER
# global job ranking
if sdict['compute_job_category_order']:
ds['cat_order'] = f.make_cat_order(ds, table[0])
return(ds)