Source code for editor_function

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# seniority_list is an analytical tool used when seniority-based work
# groups merge. It brings modern data science to the area of labor
# integration, utilizing the powerful data analysis capabilities of Python
# scientific computing.

# Copyright (C) 2016-2017  Robert E. Davison, Ruby Data Systems Inc.
# Please direct inquires to: rubydatasystems@fastmail.net

# This program is free software: you can redistribute it and/or modiffy
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

'''bokeh_editor.py

EDITOR TOOL

requires bokeh 0.12.13+ - uses bokeh server

'''

import numpy as np
import pandas as pd
import os
import sys
import pickle
from functools import partial
from collections import OrderedDict as od
from types import SimpleNamespace as sn
import scipy.stats as st
from scipy.signal import savgol_filter as sf
from numpy.polynomial import Polynomial as poly

from bokeh.plotting import figure
from bokeh.layouts import column, row, layout
from bokeh.models import ColumnDataSource, DataRange1d, \
    Span, Panel, Tabs, Label, NumeralTickFormatter, \
    DatetimeTickFormatter, HoverTool, CrosshairTool
from bokeh.models.layouts import Spacer
from bokeh.models.widgets import Slider, Button, Select, \
    RangeSlider, TextInput, CheckboxGroup
from bokeh.models.glyphs import Line
from bokeh.models.annotations import BoxAnnotation
from bokeh.transform import jitter

import functions as f
from matplotlib_charting import filter_ds


[docs]class Data(): def __init__(self, data=None): self.data = data
[docs] def update_data(self, d): self.data = d
[docs]class PropOrder(): def __init__(self, list_order=None, name=None): self.list_order = list_order self.name = name
[docs] def update_order(self, new_order): self.list_order = new_order
[docs] def update_name(self, new_name): self.name = new_name
[docs]class Kwargs(): def __init__(self, kdict=None): self.kdict = kdict if self.kdict is None: self.kdict = {}
[docs] def update(self, other_dict): self.kdict.update(other_dict)
[docs] def add(self, key, value): self.kdict[key] = value
[docs] def remove(self, key): self.kdict.pop(key)
[docs] def clear(self): self.kdict.clear()
[docs]def editor(doc, poly_dim=15, ema_len=25, savgol_window=35, savgol_fit=1, animate_speed=350, plot_width=1100, plot_height=500, strip_eg_height=50, start_dot_size=4.75, max_dot_size=25, start_marker_alpha=.65, marker_edge_color=None, marker_edge_width=0.0): '''create the editor tool use the following code to run within the notebook: .. code:: python import editor_function as ef from functools import partial from bokeh.io import show, output_notebook from bokeh.application.handlers import FunctionHandler from bokeh.application import Application output_notebook() handler = FunctionHandler(partial(ef.editor, # optional kwargs, )) app = Application(handler) show(app) inputs doc (variable) a variable representing the bokeh document, do not modify poly_dim (integer) the order of the polynomial fit line ema_len (integer) the smoothing length to use when constructing the exponential moving average line savgol_window (positive odd integer) Savitzky-Golay filter window length savgol_fit (integer) The order of the polynomial used to fit the samples. This value must be less than the savgol_window value. animate_speed (integer) Number of milliseconds between each animated month display plot_width (integer) width of main and density charts in pixels plot_height (integer) height of main chart in pixels strip_eg_height (integer) height alloted for each employee group when constructing the density chart start_dot_size (float) initial scatter marker size for main chart max_dot_size (integer) maximum scatter marker size for the main chart display, set to size sliders start_marker_alpha (float) initial scatter marker alpha (transparency) for main chart display marker_edge_color (color value string or None) color of scatter marker edge color for main chart when marker edge width value is greater than zero marker_edge_width (float) width of scatter marker edge width when marker_edge_color is not None ''' # ------START variable assignment------------------------------ try: settings_dict = pd.read_pickle('dill/dict_settings.pkl') color_dict = pd.read_pickle('dill/dict_color.pkl') except OSError: print('dict_settings.pkl and/or dict_color.pkl not found ' + '\nperhaps run build_program_files.py?') # the editor dictionary contains values representing the current state # of widget values and other variables. # The values are stored as a pickled dictionary file between sessions. # The editor dictionary is converted to a SimpleNamespace object # for use within the routine. # This way dot notation and global access is provided. # sn is the alias for SimpleNamespace ed = sn(**pd.read_pickle('dill/editor_dict.pkl')) # grab proposal names for sel_base and sel_proposal dropdowns p_list = list(pd.read_pickle('dill/proposal_names.pkl').proposals.values) # limit proposal names to 10 characters to maintain layout integrity p_list = [x[:10] for x in p_list] # add hybrid if a hybrid dataset exists if os.path.exists('dill/ds_hybrid.pkl'): p_list.append('hybrid') # make a list for baseline selection (add standalone) base_p_list = [p for p in p_list if p != 'edit'] base_p_list.append('standalone') # add edit to p_list if 'edit' not in p_list: p_list.append('edit') max_month = ed.num_of_months mth_str_list = list(np.arange(0, max_month).astype(str)) # date list for animation label background date_list = list(pd.date_range(start=settings_dict['starting_date'], periods=max_month, freq='M')) date_list = [x.strftime('%Y %b') for x in date_list] # cover the possibility of rgba values in eg_color_dict values: eg_cdict = f.convert_to_hex(color_dict['eg_color_dict']) eg_list = list(eg_cdict.keys()) # used for stripplot source (see callbacks update_scat_size_p2/alpha2) num_dots = ed.total_count str_eg_list = [str(eg) for eg in eg_list] # desc = Div(text=open(os.path.join(os.path.dirname(__file__), # 'description.html')).read(), # width=800) # slider steps for marker size and alpha size_step = .25 alpha_step = .025 strip_height = len(eg_list) * strip_eg_height aux_slider_height = 160 aux_slider_width = 22 panel1_width = 460 panel2_width = max(450, 210 + (2 * aux_slider_width * len(eg_list))) slider_edit_width = plot_width - 80 all_colors = color_list() alphas = alpha_list() widths = line_widths() # layout variables controls_height = 220 chart_sel_height = 140 but_space_width = 50 but_save_width = 260 sel_height = 40 sel_width = 95 main_but_width = 120 toggle_but_width = 25 toggle_space_width = 20 toggle_center_width = 65 but_height = 35 # squeeze tab drop_dir_dict = {'u >>': 'u', '<< d': 'd'} incr_dir_dict = {'u >>': -1, '<< d': 1} # these items are referenced when datasets are created # baseline datasets are created and stored with the RUN_SCRIPTS notebook # edited datasets are created with the editor tool for analysis cond_dict = {'none': [], 'prex': ['prex'], 'count': ['count'], 'ratio': ['ratio'], 'pc': ['prex', 'count'], 'pr': ['prex', 'ratio'], 'cr': ['count', 'ratio'], 'pcr': ['prex', 'count', 'ratio']} pcnt_cols = ['spcnt', 'lspcnt'] float_cols = ['jobp', 'mpay', 'cpay', 'ylong', 'mlong', 'age'] date_cols = ['date', 'doh', 'ldate', 'retdate'] no_invert = ['mnum', 'date', 'year', 'retdate', 'doh', 'ldate', 'scale', 's_lmonths', 'age', 'job_count', 'mlong', 'ylong', 'mpay', 'cpay'] p1_tools = 'pan, box_zoom, wheel_zoom, reset, undo, redo, save' p2_tools = 'wheel_zoom, box_zoom, reset, save' # Select widget arguments sel_size_kwargs = {'width': sel_width, 'height': sel_height} # density tab aux_slider_kwargs = {'height': aux_slider_height, 'width': aux_slider_width, 'direction': 'rtl', 'orientation': 'vertical', 'tooltips': False, 'show_value': False} size_alpha_kwargs = {'width': 30, 'height': 30} # extra filters and display tabs opers = ['<', '<=', '==', '!=', '>=', '>'] opers2 = opers + [''] # extra filters options attr_list = ['', 'cat_order', 'jobp', 'jnum', 'mnum', 'eg', 'date', 'ldate', 'doh', 'retdate', 'ylong', 'mlong', 'sg', 'age', 'scale', 's_lmonths', 'lnum', 'snum', 'mnum', 'rank_in_job', 'mpay', 'cpay'] # add or remove keys and values here for hover selection generation hdict = {0: ('lname', '@lname'), 1: ('empkey', '@empkey'), 2: ('ldate', '@ldate{%F}'), 3: ('retdate', '@retdate{%F}'), 4: ('spcnt', '@spcnt{.000}'), 5: ('ylong', '@ylong{0.00}'), 6: ('age', '@age{0.0}')} # default string for tooltip formatting # the tuples from the dictionary above are added as appropriate for # proper hover names and value formatting further in the routine html_str = ('<div>' + '<span style=' + '"font-size: 13px; font-weight: bold; ' + 'color: @c;">%s:</span>' + '<span style="font-size: 13px;">%s</span>' + '</div>') # display attribute options display_attrs = ['jobp', 'cat_order', 'spcnt', 'lspcnt', 'jnum', 'mpay', 'cpay', 'snum', 'lnum', 'ylong', 'mlong', 'age', 's_lmonths', 'ldate', 'doh'] # size_alpha tab vars sl_size_dict = {} sl_alpha_dict = {} slider_list = [] # plot_note label and calc_note label arguments note_kwargs = dict(x=40, y=40, x_units='screen', y_units='screen', border_line_color='black', border_line_alpha=.5, background_fill_alpha=1.0, text_font_size='15pt', visible=False) plot_kwargs = dict(text='..filtering data... ', background_fill_color='#ffcc80', **note_kwargs) calc_kwargs = dict(text='..calculating new dataset... ', background_fill_color='#99ddff', **note_kwargs) # ------END variable assignment--------------------------------- # ------START widget declarations------------------------------- # squeeze tab sel_sqz_type = Select(options=['log', 'slide'], value=ed.sel_sqz_type, title='sqz type', **sel_size_kwargs) sel_emp_grp = Select(options=str_eg_list, value=ed.sel_emp_grp, title='emp group', **sel_size_kwargs) sel_sqz_dir = Select(options=['u >>', '<< d'], value=ed.sel_sqz_dir, title='sqz dir', **sel_size_kwargs) slider_squeeze = Slider(start=1, end=400, value=ed.slider_squeeze, step=1, title='squeeze', width=450, height=40, bar_color='#ffe6cc') but_0add = Button(label='<', width=toggle_but_width) but_0sub = Button(label='>', width=toggle_but_width) but_squeeze = Button(label='SQUEEZE', width=main_but_width, height=but_height, button_type='success') but_1add = Button(label='<', width=toggle_but_width) but_1sub = Button(label='>', width=toggle_but_width) # extra filters tab sel_filt1 = Select(options=attr_list, value=ed.sel_filt1, title='Filter 1', width=115, height=sel_height) sel_filt2 = Select(options=attr_list, value=ed.sel_filt2, title='Filter 2', width=115, height=sel_height) sel_filt3 = Select(options=attr_list, value=ed.sel_filt3, title='Filter 3', width=115, height=sel_height) sel_oper1 = Select(options=opers2, value=ed.sel_oper1, title='Oper 1', **sel_size_kwargs) sel_oper2 = Select(options=opers2, value=ed.sel_oper2, title='Oper 2', **sel_size_kwargs) sel_oper3 = Select(options=opers2, value=ed.sel_oper3, title='Oper 3', **sel_size_kwargs) txt_input1 = TextInput(value=ed.txt_input1, title='Val 1', height=sel_height, width=145) txt_input2 = TextInput(value=ed.txt_input2, title='Val 2', height=sel_height, width=145) txt_input3 = TextInput(value=ed.txt_input3, title='Val 3', height=sel_height, width=145) # animate tab slider_animate = Slider(start=0, end=max_month - 1, value=int(ed.sel_mth_num), step=1, title='Month', width=350, orientation='horizontal', tooltips=False, show_value=True, bar_color='#a6a6a6') but_play = Button(label='► Play', width=90) but_reset = Button(label='Reset', width=90) # This commented section is on hold for future development... # chk_trails = CheckboxGroup(labels=['show_trails'], # active=ed.chk_trails, # height=35, width=130, inline=False) # trails_list = ['all'] # trails_list.extend(mth_str_list) # sel_trails = Select(options=trails_list, # value=ed.sel_trails, title='trail_mths', # width=sel_width, height=sel_height) but_fwd = Button(label='FWD', width=90) but_back = Button(label='BACK', width=90) but_refresh = Button(label='refresh size_alpha', width=120) label = Label(x=20, y=plot_height - 150, x_units='screen', y_units='screen', text='', text_alpha=.25, text_color='#b3b3b3', text_font_size='70pt') # proposal_save tab but_save_edit = Button(label='SAVE EDITED DATASET', button_type='warning', width=but_save_width) but_save_order = Button(label='SAVE EDITED ORDER to proposals.xlsx', button_type='danger', width=but_save_width) sel_base = Select(options=base_p_list, value=ed.sel_base, title='baseline:', width=sel_width, height=sel_height + 5) condition_options = list(cond_dict.keys()) sel_cond = Select(options=condition_options, value=ed.sel_cond, title='conditions:', width=sel_width, height=sel_height + 5) sel_proposal = Select(options=p_list, value=ed.sel_proposal, title='proposal:', **sel_size_kwargs) # center column sel_measure = Select(options=display_attrs, value=ed.sel_measure, title='display attr:', width=sel_width, height=sel_height + 15) but_calc = Button(label='CALC', width=sel_width + 12, height=but_height, button_type='primary') but_plot = Button(label='PLOT', width=sel_width + 12, height=but_height, button_type='warning') # display tab chk_filter = CheckboxGroup(labels=['use extra filters', 'at_retire_only'], active=ed.chk_filter, height=35, width=130, inline=False) sel_mth_oper = Select(options=opers, value=ed.sel_mth_oper, title='month oper', **sel_size_kwargs) sel_mth_num = Select(options=mth_str_list, value=ed.sel_mth_num, title='month num', **sel_size_kwargs) chk_display = CheckboxGroup(labels=['scatter', 'poly_fit', 'mean', 'savgol'], active=ed.chk_display, height=40, width=70, inline=False) sel_ytype = Select(options=['diff', 'abs'], value=ed.sel_ytype, title='ytype', **sel_size_kwargs) sel_xtype = Select(options=['prop_s', 'prop_r', 'pcnt_s', 'pcnt_r'], value=ed.sel_xtype, title='xtype', **sel_size_kwargs) # size_alpha tab: for eg in eg_list: sl_size_dict[eg] = Slider(start=.5, end=max_dot_size, value=start_dot_size, step=size_step, title='S', bar_color=eg_cdict[eg], **aux_slider_kwargs) sl_alpha_dict[eg] = Slider(start=0.0, end=1.0, value=start_marker_alpha, step=alpha_step, title='A', bar_color=eg_cdict[eg], **aux_slider_kwargs) slider_list.extend([sl_size_dict[eg], sl_alpha_dict[eg]]) but_slider_reset = Button(label='Reset', width=50) but_slider_big = Button(label='S >', **size_alpha_kwargs) but_slider_sml = Button(label='< S', **size_alpha_kwargs) but_slider_aup = Button(label='A >', **size_alpha_kwargs) but_slider_adn = Button(label='< A', **size_alpha_kwargs) # grid_bg tab sel_bgc = Select(options=all_colors, value=ed.sel_bgc, title='chart / edit_fill', width=115, height=sel_height) sel_gridc = Select(options=all_colors, value=ed.sel_gridc, title='grid / edit_line', width=115, height=sel_height) sel_bgc_alpha = Select(options=alphas, value=ed.sel_bgc_alpha, title='alpha', width=70, height=sel_height) sel_gridc_alpha = Select(options=alphas, value=ed.sel_gridc_alpha, title='alpha', width=70, height=sel_height) but_reset_colors = Button(label='Reset', width=60) chk_minor_grid = CheckboxGroup(labels=['minor grid lines'], active=ed.chk_minor_grid) chk_color_apply = CheckboxGroup(labels=['chart bg/grid', 'edit zone'], active=ed.chk_color_apply, height=50) sel_box_line_width = Select(options=widths, value=ed.box_line_width, title='edit_line_width', width=70, height=sel_height) # hover tab chk_hover_on = CheckboxGroup(labels=['hover ON'], active=ed.chk_hover_on, width=150) # get column names from hdict (first value of each tuple) hover_labels = [val[0] for val in hdict.values()] chk_hover_sel = CheckboxGroup(labels=hover_labels, active=ed.chk_hover_sel, width=120) # density tab (stripplot): slider_strip_size = Slider(start=.05, end=15.0, value=ed.p2_marker_size, step=.05, title='S', height=40, width=200, tooltips=False, show_value=True, bar_color='#e6e6e6') slider_strip_alpha = Slider(start=.025, end=1.0, value=ed.p2_marker_alpha, step=.025, title='A', height=40, width=200, tooltips=False, show_value=True, bar_color='#e6e6e6') slider_edit_zone = RangeSlider(start=0.0, end=ed.ez_end, value=(float(ed.x_low), float(ed.x_high)), step=ed.ez_step, title='edit range values', width=slider_edit_width, bar_color='#a6a6a6', direction='rtl', show_value=True) plot_note = Label(**plot_kwargs) calc_note = Label(**calc_kwargs) # Spacer Widgets................... # layout column spacers (between left, center, and right controls) spacer_controls1 = Spacer(width=50) spacer_controls2 = Spacer(width=50) # squeeze tab spacer_sqz_but2 = Spacer(width=but_space_width) spacer_sqz_but3 = Spacer(width=but_space_width) spacer_toggle_1 = Spacer(width=toggle_space_width) spacer_toggle_center1 = Spacer(width=toggle_center_width) spacer_toggle_center2 = Spacer(width=toggle_center_width) spacer_toggle_2 = Spacer(width=toggle_space_width) # animate tab spacer_anim1 = Spacer(width=60, height=but_height) spacer_anim_refresh = Spacer(width=60, height=but_height) spacer_anim2 = Spacer(width=60, height=but_height) # proposal_save tab spacer_top_save = Spacer(width=but_save_width, height=50) spacer_middle_save = Spacer(width=35, height=aux_slider_height) # above sel_measure dropdown (center column) spacer_top_center_col = Spacer(height=40, width=sel_width) # display tab: spacer_top_disp = Spacer(width=200, height=45) spacer_disp_mth1 = Spacer(width=35) spacer_disp_mth2 = Spacer(width=35) spacer_disp_ax1 = Spacer(width=35) spacer_disp_ax2 = Spacer(width=35) # size_alpha tab spacer_top_size_alpha = Spacer(width=50, height=50) spacer_size_buts = Spacer(width=30) spacer_alpha_buts = Spacer(width=30) # grid_bg tab spacer_linesbg_col = Spacer(width=60) spacer_linesbg_col2 = Spacer(width=5) spacer_top_color_apply = Spacer(width=70, height=40) spacer_linesbg_bottom = Spacer(width=75) # edit zone slider (left margin) spacer_edit = Spacer(width=40) # ------END widget declarations--------------------------------- # ------START Class instantiations------------------------------ proposal = PropOrder() diff_str = Data() filt_str = Data() skel = Data() ds_stand = Data() base_ds = Data() calc_ds = Data() idx_df = Data() filt_df = Data() strip_df = Data() reorder_df = Data() anim_df = Data() mgrps_gb = Data() filt_xax = Data() idx_xax = Data() alpha_filt_arr = Data() eg_filt_arr = Data() zero_filt_arr = Data() size_filt_arr = Data() tool_tips = Data() hover_tool = Data() crosshair_tool = Data() polys = Kwargs() means = Kwargs() savgols = Kwargs() src_dict = Kwargs() # ------figures, sources, tool classes---------------------------- p1 = figure(min_border_left=50, tools=p1_tools) p2 = figure(min_border_left=50, width=plot_width, height=strip_height, x_range=DataRange1d(flipped=True, range_padding=0.0), y_range=DataRange1d(flipped=True, range_padding=0.05), tools=p2_tools) source1 = ColumnDataSource(data=dict(a=[], c=[], s=[], x=[], y=[])) source2 = ColumnDataSource(data=dict(a=[], c=[], eg=[], s=[], x=[])) # -------------------------------------------------------------- box_kwargs = dict(fill_alpha=float(ed.box_fill_alpha), fill_color=ed.box_fill_color, line_color=ed.box_line_color, line_alpha=float(ed.box_line_alpha), line_width=float(ed.box_line_width), level='underlay', ) box1 = BoxAnnotation(**box_kwargs.copy()) box2 = BoxAnnotation(**box_kwargs.copy()) # ------polyfit, mean, and savgol smoothing line glyphs------- # dummy nan dict nan_dict = dict(x=np.full(1, np.nan), y=np.full(1, np.nan)) # line glyphs arguments poly_kwargs = dict(x="x", y="y", line_width=15, line_alpha=0.7) mean_kwargs = dict(x="x", y="y", line_width=6, line_alpha=0.7) savgol_kwargs = dict(x="x", y="y", line_width=8, line_alpha=0.7) for eg in eg_list: # ----make line glyphs------------------------ polys.kdict['p' + str(eg)] = Line(line_color=eg_cdict[eg], **poly_kwargs) means.kdict['m' + str(eg)] = Line(line_color=eg_cdict[eg], **mean_kwargs) savgols.kdict['s' + str(eg)] = Line(line_color=eg_cdict[eg], **savgol_kwargs) # ----line glyphs data source instantiation---- src_dict.kdict['sp' + str(eg)] = \ ColumnDataSource(data=nan_dict.copy()) src_dict.kdict['sm' + str(eg)] = \ ColumnDataSource(data=nan_dict.copy()) src_dict.kdict['ss' + str(eg)] = \ ColumnDataSource(data=nan_dict.copy()) # hover and crosshair tools hover_tool.data = HoverTool(formatters={'ldate': 'datetime', 'retdate': 'datetime'}, show_arrow=False) hover_cols = Data() crosshair_tool.data = CrosshairTool(dimensions='both', line_alpha=.3, line_color='red', line_width=.75) # ------END Class instantiations------------------------------ # ------START Callback functions------------------------------ # squeeze source def sqz_type_change(attr, old, new): ed.sel_sqz_type = new def emp_group_change(attr, old, new): ed.sel_emp_grp = new def sqz_dir_change(attr, old, new): ed.sel_sqz_dir = new def update_squeeze(attr, old, new): ed.slider_squeeze = new # toggle line adjustment: def line1_add(): low_slider = ed.x_low high_slider = ed.x_high if ed.sel_xtype in ['prop_s', 'prop_r']: high_slider += 1 else: if high_slider < 1.0: high_slider += .001 slider_edit_zone.value = (low_slider, high_slider) def line1_sub(): low_slider = ed.x_low high_slider = ed.x_high if high_slider > low_slider: if ed.sel_xtype in ['prop_s', 'prop_r']: high_slider -= 1 else: high_slider -= .001 slider_edit_zone.value = (low_slider, high_slider) def line0_add(): low_slider = ed.x_low high_slider = ed.x_high if low_slider < high_slider: if ed.sel_xtype in ['prop_s', 'prop_r']: low_slider += 1 else: low_slider += .001 slider_edit_zone.value = (low_slider, high_slider) def line0_sub(): low_slider = ed.x_low high_slider = ed.x_high if ed.sel_xtype in ['prop_s', 'prop_r']: low_slider -= 1 else: if low_slider > 0.0: low_slider -= .001 slider_edit_zone.value = (low_slider, high_slider) def perform_squeeze(): # make new order for sripplot and/or skeleton if ed.sel_proposal != 'edit': sel_proposal.value = 'edit' squeeze_eg = int(ed.sel_emp_grp) ed.x_low = slider_edit_zone.value[0] ed.x_high = slider_edit_zone.value[1] low_val = f.cross_val(filt_xax.data, ed.x_low, idx_xax.data) high_val = f.cross_val(filt_xax.data, ed.x_high, idx_xax.data) if sel_sqz_type.value == 'log': direction = drop_dir_dict[ed.sel_sqz_dir] factor = slider_squeeze.value * .005 squeezer = f.squeeze_logrithmic(reorder_df.data, squeeze_eg, low_val, high_val, direction=direction, put_segment=1, log_factor=factor) if sel_sqz_type.value == 'slide': incr_dir_correction = incr_dir_dict[ed.sel_sqz_dir] increment = slider_squeeze.value * incr_dir_correction squeezer = f.squeeze_increment(reorder_df.data, squeeze_eg, low_val, high_val, increment=increment) strip_df.update_data(reorder_df.data.copy()) strip_df.data['prop_s'] = squeezer strip_df.data.drop(['new_order'], axis=1, inplace=True) for col in ['c', 'eg']: strip_df.data[col] = source2.data[col] strip_df.data['a'] = ed.p2_marker_alpha strip_df.data['s'] = ed.p2_marker_size strip_df.data.sort_values('prop_s', inplace=True) reorder_df.data['new_order'] = squeezer reorder_df.data.sort_values('new_order', inplace=True) reorder_df.data['new_order'] = np.arange(1, len(reorder_df.data) + 1, dtype='int') proposal.update_order(reorder_df.data[['new_order']]) update_stripplot() # extra filters def update_sel_filt1(attr, old, new): ed.sel_filt1 = new def update_sel_filt2(attr, old, new): ed.sel_filt2 = new def update_sel_filt3(attr, old, new): ed.sel_filt3 = new def update_oper1(attr, old, new): ed.sel_oper1 = new def update_oper2(attr, old, new): ed.sel_oper2 = new def update_oper3(attr, old, new): ed.sel_oper3 = new def update_txt_input1(attr, old, new): ed.txt_input1 = new def update_txt_input2(attr, old, new): ed.txt_input2 = new def update_txt_input3(attr, old, new): ed.txt_input3 = new # animate def animate_source(attr, old, new): use_hover = ed.chk_hover_on and ed.chk_hover_sel if mgrps_gb.data: hover_dict = {} # try to find data for selected month group, if none found, stop try: mth = mgrps_gb.data.get_group(new) except: label.text = 'NO DATA' return x = mth[ed.sel_xtype].values y = mth[ed.sel_ytype].values c = mth['c'].values a = mth['a'].values s = mth['s'].values eg = mth['eg'].values s1_dict = {'x': x, 'y': y, 'c': c, 'a': a, 's': s, 'eg': eg} if use_hover: for idx in ed.chk_hover_sel: col = hdict[idx][0] if col != ed.sel_measure: hover_dict[col] = mth[col].values s1_dict.update(hover_dict) source1.update(data=s1_dict) label.text = date_list[new] sel_mth_num.value = str(new) # reset "running" values for edit zone value conversion using # the cross_val function (use current month values, not # the values from the last time the "plot" button was used) if ed.sel_xtype in ['prop_r', 'pcnt_r']: filt_xax.data = x idx_xax.data = mth['prop_s'].values def animate(): global cb_id box1.right, box1.left = None, None if but_play.label == '► Play': but_play.label = '❚❚ Pause' cb_id = doc.add_periodic_callback(animate_update, animate_speed) else: but_play.label = '► Play' doc.remove_periodic_callback(cb_id) def reset(): box1.right, box1.left = None, None slider_animate.value = 0 sel_mth_num.value = '0' sel_mth_oper.value = '==' def refresh(): eg_arr = anim_df.data['eg'].values for eg, slider in sl_size_dict.items(): np.put(anim_df.data['s'], np.where(eg_arr == eg)[0], slider.value) for eg, slider in sl_alpha_dict.items(): np.put(anim_df.data['a'], np.where(eg_arr == eg)[0], slider.value) # capture the new size and alpha values for the month groupby data mgrps_gb.update_data(anim_df.data.groupby('mnum')) def fwd1(): box1.right, box1.left = None, None new_val = slider_animate.value + 1 if new_val < max_month: slider_animate.value = new_val sel_mth_num.value = str(new_val) def back1(): box1.right, box1.left = None, None new_val = slider_animate.value - 1 if new_val >= 0: slider_animate.value = new_val sel_mth_num.value = str(new_val) def animate_update(): box1.right, box1.left = None, None mth = slider_animate.value + 1 if mth > max_month: mth = 0 slider_animate.value = mth sel_mth_num.value = str(mth) # def prepare_animate(attr, old, new): # pass # future development...trails # proposal_save # grab the widget values, create a dictionary, pickle def store_vals(): with open('dill/editor_dict.pkl', 'wb') as handle: pickle.dump(vars(ed), handle, protocol=pickle.HIGHEST_PROTOCOL) def save_edited_df(): store_vals() calc_ds.data.to_pickle('dill/ds_edit.pkl') save_edited_order() def save_edited_order(): reorder_df.data[['new_order']].to_pickle('dill/p_edit.pkl') def save_order_to_excel(): xl_str = 'excel/' + ed.case + '/proposals.xlsx' df = reorder_df.data[['new_order']] df = df.reset_index()[['empkey']] df.index = df.index + 1 df.index.name = 'order' ws_dict = pd.read_excel(xl_str, index_col=0, sheet_name=None) ws_dict['edit'] = df with pd.ExcelWriter(xl_str, engine='xlsxwriter') as writer: for ws_name, df_sheet in ws_dict.items(): df_sheet.to_excel(writer, sheet_name=ws_name) def base_change(attr, old, new): ed.sel_base = new def cond_change(attr, old, new): ed.sel_cond = new def find_order(): try: # look for edit list or compare list (determined by sel_proposal) if ed.sel_proposal == 'edit': # edit order prop_name = 'edit' if proposal.list_order is not None: df_order = proposal.list_order else: df_order = pd.read_pickle('dill/p_edit.pkl') else: # reset to compare order prop_name = ed.sel_proposal df_order = pd.read_pickle('dill/p_' + ed.sel_proposal + '.pkl') except OSError: # above not found, default to first found df_order, prop_name = use_first_proposal_found('edit') proposal.update_order(df_order) proposal.update_name(prop_name) def proposal_change(attr, old, new): ed.sel_proposal = new # set the proposal.list_order find_order() # Center Column def measure_change(attr, old, new): ed.sel_measure = new def calc_button(): label.text = '' calc_note.visible = True find_order() calc_dataset() join_dataset() update_main_plot() update_stripplot() calc_note.visible = False def plot_button(): label.text = '' plot_note.visible = True join_dataset() update_main_plot() plot_note.visible = False def calc_dataset(): # this routine creates a new integrated dataset based on a given # list order and list of job assignment conditions # to change calculation order, # update the proposal.list_order property... # save the input list order (not every time a squeeze is done) if # the edit proposal is selected (sel_proposal). # if the proposal is not edit, the order column is 'idx', # not 'new_order'. # This avoids saving a non-edit proposal list as an edited list. if 'new_order' in proposal.list_order.columns: proposal.list_order.to_pickle('dill/p_edit.pkl') # save the widget settings store_vals() # calling the main integrated dataset generation routine... ds = make_dataset(proposal_name=proposal.name, df_order=proposal.list_order, conditions=cond_dict[ed.sel_cond], ds=skel.data, ds_stand=ds_stand.data) calc_ds.update_data(ds) # set to instance of Data class def update_axis_formats(): if len(filt_df.data): if ed.sel_ytype == 'abs': if ed.sel_measure in ['cpay', 'mpay', 'ylong', 'mlong', 'age', 'scale', 's_lmonths']: ed.cht_yflipped = False else: ed.cht_yflipped = True else: ed.cht_yflipped = False p1.y_range.update(flipped=ed.cht_yflipped) if ed.sel_measure in pcnt_cols: p1.yaxis[0].formatter = NumeralTickFormatter(format="0.0%") else: if ed.sel_measure in float_cols: p1.yaxis[0].formatter = NumeralTickFormatter(format="0.0") elif ed.sel_measure in date_cols: p1.yaxis[0].formatter = DatetimeTickFormatter(years=['%Y']) else: p1.yaxis[0].formatter = NumeralTickFormatter(format="0") if ed.sel_xtype in ['pcnt_s', 'pcnt_r']: p1.xaxis[0].formatter = NumeralTickFormatter(format="0.0%") if (slider_edit_zone.value[1] > 1 or slider_edit_zone.value[1] > max(filt_xax.data)): ed.x_high = .65 * max(filt_xax.data) ed.x_low = .45 * max(filt_xax.data) ed.ez_step = .001 else: p1.xaxis[0].formatter = NumeralTickFormatter(format="0") if (slider_edit_zone.value[1] <= 1 or slider_edit_zone.value[1] > max(filt_xax.data)): ed.x_high = int(.65 * max(filt_xax.data)) ed.x_low = int(.45 * max(filt_xax.data)) ed.ez_step = 1 slider_edit_zone.update(end=max(filt_xax.data), step=ed.ez_step, value=(ed.x_low, ed.x_high)) def join_dataset(): label.text = '' ret_only = 1 in ed.chk_filter extra_filter = 0 in ed.chk_filter base_cols = [ed.sel_measure, 'mnum'] calc_ds_cols = [ed.sel_measure, 'mnum', 'new_order', 'eg'] if ret_only: calc_ds_cols.append('ret_mark') # if extra filters are to be used, the appropriate columns are # added to the dataframe if extra_filter: # this means filter is checked a1 = ed.sel_filt1 a2 = ed.sel_filt2 a3 = ed.sel_filt3 o1 = ed.sel_oper1 o2 = ed.sel_oper2 o3 = ed.sel_oper3 v1 = ed.txt_input1 v2 = ed.txt_input2 v3 = ed.txt_input3 # filtlist means "filter list" attr_filtlist = [a1, a2, a3] oper_filtlist = [o1, o2, o3] vals_filtlist = [v1, v2, v3] # add filter columns filt_cols = [] for i, attr in enumerate(attr_filtlist): if attr_filtlist[i] and oper_filtlist[i] and vals_filtlist[i]: filt_cols.append(attr) filt_cols = list(set(filt_cols)) calc_ds_cols = list(set().union(calc_ds_cols, filt_cols)) if ed.chk_hover_on and ed.chk_hover_sel: hover_cols.data = [] for key in ed.chk_hover_sel: col = hdict[key][0] if col != ed.sel_measure: hover_cols.data.append(col) if hover_cols.data: calc_ds_cols = list(set().union(calc_ds_cols, hover_cols.data)) # ----------------------------------------------------------------- # BASE DATAFRAME (not filtered) # assign base_ds - check if stored dataset must be read from disc # or current base_ds may be used if ed.sel_base == 'standalone': base_ds.data = ds_stand.data else: if ed.sel_base != ed.base_ds_name: base_ds.data = pd.read_pickle('dill/ds_' + ed.sel_base + '.pkl') ed.base_ds_name = ed.sel_base df = base_ds.data[base_cols].copy() df.rename(columns={ed.sel_measure: ed.sel_measure + '_b'}, inplace=True) # for stripplot and squeeze (month zero): data_reorder = calc_ds.data[calc_ds.data.mnum == 0][['eg']].copy() data_reorder['new_order'] = \ np.arange(len(data_reorder)).astype(int) + 1 # set the df attribute of the reorder_df Data object: reorder_df.update_data(data_reorder) # index df for range values conversion (integrated ds month zero order) idx_df.update_data(reorder_df.data[[]].copy()) idx_df.data['orig_order'] = np.arange(len(idx_df.data)) + 1 join_ds = calc_ds.data[calc_ds_cols].copy() # add mnum to index df.set_index('mnum', append=True, inplace=True) join_ds.set_index('mnum', append=True, inplace=True) join_ds.rename(columns={ed.sel_measure: 'abs', 'new_order': 'prop_s'}, inplace=True) # JOIN BASE and COMPARE df = df.join(join_ds) df.reset_index(level='mnum', inplace=True) df.sort_values(['mnum', 'prop_s'], inplace=True) strip_df.update_data(df[df.mnum == 0][['prop_s', 'eg']].copy()) # set up color column - Note rgba values do not work with this egs = strip_df.data['eg'].values clr = np.empty(len(strip_df.data), dtype='object') for eg in eg_list: np.put(clr, np.where(egs == eg)[0], eg_cdict[eg]) strip_df.data['c'] = clr strip_df.data['a'] = ed.p2_marker_alpha strip_df.data['s'] = ed.p2_marker_size # running (monthly) proposal list ordering df['prop_r'] = df.groupby('mnum').cumcount() + 1 prop_r = df.prop_r.values eg_vals = df.eg.values eg_denom_dict = df.groupby('eg').prop_r.max().to_dict() denoms = np.zeros(eg_vals.size) for eg in eg_list: np.put(denoms, np.where(eg_vals == eg)[0], eg_denom_dict[eg]) df['pcnt_r'] = prop_r / denoms df['pcnt_s'] = f.make_starting_val_column(df, 'pcnt_r', inplace=False) # FILTERING # ret only filter if ret_only: # this means ret_only is checked df = df[eval('(df.ret_mark == 1)')].copy() if extra_filter: df, filt_str.data = filter_ds(df, attr1=a1, oper1=o1, val1=v1, attr2=a2, oper2=o2, val2=v2, attr3=a3, oper3=o3, val3=v3) filt_str.data = ', with filter: [ ' + filt_str.data + ' ]' else: filt_str.data = '' df = add_source_columns(df) df.sort_values(by='prop_s', inplace=True) # make dataframe and groupby source for animation anim_df.data = df.copy() mgrps_gb.update_data(anim_df.data.groupby('mnum')) # month filter mnum_oper = ed.sel_mth_oper mnum_val = ed.sel_mth_num mnum_filt_str = ' '.join(['mnum', mnum_oper, mnum_val]) mnum_str = '(df.' + mnum_filt_str + ')' df_display = df[eval(mnum_str)].copy() if len(df_display): filt_df.update_data(df_display) # make arrays from filt_df filt_xax.update_data(filt_df.data[ed.sel_xtype].values) idx_xax.update_data(filt_df.data['prop_s'].values) alpha_filt_arr.update_data(filt_df.data['a'].values) eg_filt_arr.update_data(filt_df.data['eg'].values) zero_filt_arr.update_data(np.full(len(filt_df.data), 0.0)) size_filt_arr.update_data(filt_df.data['s'].values) slider_edit_zone.update(end=max(filt_xax.data)) else: # if df_display is empty (through use of extra filters): label.text = 'NO DATA: mth ' + ed.sel_mth_num def add_source_columns(df): # set up color column - Note rgba values do not work with this egs = df['eg'].values clr = np.empty(len(df), dtype='object') alph = np.zeros(len(df)) sze = np.zeros(len(df)) # set colors from eg_cdict and set size and alpha from slider values for eg in eg_list: these_idx = np.where(egs == eg)[0] np.put(clr, these_idx, eg_cdict[eg]) np.put(alph, these_idx, sl_alpha_dict[eg].value) np.put(sze, these_idx, sl_size_dict[eg].value) df['c'] = clr df['a'] = alph df['s'] = sze # add "diff" column if selected by sel_ytype dropdown widget input if ed.sel_ytype == 'diff': diff_str.data = ' vs ' + ed.sel_base + ' ' if ed.sel_measure not in no_invert: df['diff'] = df[ed.sel_measure + '_b'] - df['abs'] else: df['diff'] = df['abs'] - df[ed.sel_measure + '_b'] else: diff_str.data = ' ' return df def update_main_plot(): p1.title.text = (proposal.name + diff_str.data + ed.sel_measure.upper() + ' ' + ed.sel_ytype + ' values' + filt_str.data) if 0 in ed.chk_display: acol = alpha_filt_arr.data else: acol = zero_filt_arr.data source1.data = {'x': filt_df.data[ed.sel_xtype].values, 'y': filt_df.data[ed.sel_ytype].values, 'c': filt_df.data['c'].values, 'a': acol, 's': filt_df.data['s'].values, 'eg': filt_df.data['eg'].values} if ed.chk_hover_on and ed.chk_hover_sel: for key in ed.chk_hover_sel: col = hdict[key][0] if col != ed.sel_measure: source1.add(data=filt_df.data[col].values, name=col) xl = float(ed.x_low) xh = float(ed.x_high) box1.left, box1.right = xh, xl xl2 = f.cross_val(filt_xax.data, xl, idx_xax.data) xh2 = f.cross_val(filt_xax.data, xh, idx_xax.data) box2.left, box2.right = xh2, xl2 clear_line_data() update_axis_formats() update_line_data() def make_plots(return_plots=False): p1.plot_width = plot_width # ed.cht_xsize p1.plot_height = plot_height # ed.cht_ysize p1.y_range = DataRange1d(range_padding=0.0) p1.x_range = DataRange1d(end=0.0, flipped=True, range_padding=0.0) p1.title.text = (proposal.name + diff_str.data + ed.sel_measure.upper() + ' ' + ed.sel_ytype + ' values' + filt_str.data) p1.background_fill_color = ed.sel_bgc p1.background_fill_alpha = float(ed.sel_bgc_alpha) p1.add_tools(crosshair_tool.data) p1.add_tools(hover_tool.data) p1.toolbar.active_inspect = [hover_tool.data] # p1.output_backend = 'webgl' box1.left, box1.right = ed.x_high, ed.x_low p2.background_fill_color = ed.sel_bgc p2.background_fill_alpha = float(ed.sel_bgc_alpha) # p2.output_backend = 'webgl' box2.left, box2.right = ed.x_high, ed.x_low # source1 dictionary assignment src1_dict = {'x': filt_df.data[ed.sel_xtype], 'y': filt_df.data[ed.sel_ytype], 'c': filt_df.data['c'], 'a': filt_df.data['a'], 's': filt_df.data['s'], 'eg': filt_df.data['eg']} if ed.chk_hover_on and ed.chk_hover_sel: hover_dict = {} for idx in ed.chk_hover_sel: col = hdict[idx][0] if col != ed.sel_measure: hover_dict[col] = filt_df.data[col].values src1_dict.update(hover_dict) source1.update(data=src1_dict) # source2 dictionary assignment src2_dict = {'x': strip_df.data['prop_s'], # 'y': df_display[yval], 'c': strip_df.data['c'], 'a': strip_df.data['a'], 's': strip_df.data['s'], 'eg': strip_df.data['eg']} # set ColumnDataSource data source1.data = src1_dict source2.data = src2_dict # ------------------------------------------------------------------ p1.grid.grid_line_color = ed.sel_gridc p1.grid.grid_line_alpha = float(ed.sel_gridc_alpha) p1.toolbar.logo = None p1.grid.minor_grid_line_color = ed.sel_gridc p1.grid.minor_grid_line_alpha = ed.minor_grid_alpha p1.grid.minor_grid_line_dash = 'dotted' p1.circle('x', 'y', color='c', size='s', alpha='a', line_color=marker_edge_color, line_width=marker_edge_width, source=source1) p2.circle(x='x', y=jitter('eg', width=0.92, distribution="uniform"), color='c', size='s', alpha='a', line_color=None, source=source2) p2.yaxis[0].ticker.desired_num_ticks = len(eg_list) p2.yaxis.minor_tick_line_color = None p2.ygrid.grid_line_color = None p2.xgrid.grid_line_color = ed.sel_gridc p2.xgrid.grid_line_alpha = float(sel_gridc_alpha.value) p2.toolbar.logo = None p1.add_layout(box1) p2.add_layout(box2) # p2.add_glyph(quad2_source, quad2) add_line_glyphs(eg_list) update_line_data() update_axis_formats() # zeroline zeroline = Span(location=0, dimension='width', line_dash='dashed', line_color='red', line_width=1) p1.add_layout(zeroline) p1.add_layout(label) if return_plots: return p1, p2 # display tab def filter_change(attr, old, new): ed.chk_filter = list(chk_filter.active) if 1 not in ed.chk_filter: sel_mth_oper.value = '==' def add_line_glyphs(eg_list): for eg in eg_list: p1.add_glyph(src_dict.kdict['sp' + str(eg)], glyph=polys.kdict['p' + str(eg)]) p1.add_glyph(src_dict.kdict['sm' + str(eg)], glyph=means.kdict['m' + str(eg)]) p1.add_glyph(src_dict.kdict['ss' + str(eg)], glyph=savgols.kdict['s' + str(eg)]) def update_line_data(): chkd = set.intersection(set([1, 2, 3]), set(ed.chk_display)) # scatter markers if 0 in ed.chk_display: source1.data.update(a=alpha_filt_arr.data) else: source1.data.update(a=zero_filt_arr.data) if chkd: for eg in pd.unique(filt_df.data['eg']): eg_df = filt_df.data[filt_df.data['eg'] == eg].copy() xlvals = eg_df[ed.sel_xtype].values ylvals = eg_df[ed.sel_ytype].values idx = np.isfinite(xlvals) & np.isfinite(ylvals) xlvals = xlvals[idx] ylvals = ylvals[idx] # poly_fit if 1 in chkd: pdata = poly.fit(xlvals, ylvals, poly_dim).linspace() src_dict.kdict['sp' + str(eg)].data = \ dict(x=list(pdata[0]), y=list(pdata[1])) # mean if 2 in chkd: yma = ema(ylvals, ema_len) src_dict.kdict['sm' + str(eg)].data = dict(x=xlvals, y=yma) # Savitzky–Golay filter if 3 in chkd: sf_data = sf(ylvals, savgol_window, savgol_fit) sf_data[sf_data == np.nan] = 0 src_dict.kdict['ss' + str(eg)].data.update(x=xlvals, y=sf_data) def clear_line_data(): for eg in eg_list: # poly_fit src_dict.kdict['sp' + str(eg)].data.update(**nan_dict) # mean src_dict.kdict['sm' + str(eg)].data.update(**nan_dict) # savgol src_dict.kdict['ss' + str(eg)].data.update(**nan_dict) def ema(arr, n): """ compute an n period exponential moving average. """ x = np.asarray(arr) weights = np.exp(np.linspace(-1., 0., n)) weights /= weights.sum() a = np.convolve(x, weights, mode='full')[:len(x)] a[:n] = a[n] return a def display_change(attr, old, new): ed.chk_display = list(chk_display.active) clear_line_data() update_line_data() def month_oper_change(attr, old, new): ed.sel_mth_oper = new def month_num_change(attr, old, new): ed.sel_mth_num = new def ytype_change(attr, old, new): ed.sel_ytype = new def xtype_change(attr, old, new): ed.sel_xtype = new # size_alpha def reset_sliders(): for s_slider in sl_size_dict.values(): s_slider.value = start_dot_size for a_slider in sl_alpha_dict.values(): a_slider.value = start_marker_alpha def slider_big(): for s_slider in sl_size_dict.values(): if s_slider.value < max_dot_size: s_slider.value += size_step def slider_sml(): for s_slider in sl_size_dict.values(): if s_slider.value >= size_step: s_slider.value -= size_step def slider_aup(): for a_slider in sl_alpha_dict.values(): if a_slider.value <= 1 - alpha_step: a_slider.value += alpha_step def slider_adn(): for slider in sl_alpha_dict.values(): if slider.value > 0: slider.value -= alpha_step # size_alpha source def update_scat_size_p1(attr, old, new, eg): s = sl_size_dict[eg].value s_arr = np.array(source1.data['s']) eg_arr = np.array(source1.data['eg']) np.put(s_arr, np.where(eg_arr == eg)[0], s) source1.data.update({'s': s_arr}) def update_scat_alpha_p1(attr, old, new, eg): a = sl_alpha_dict[eg].value a_arr = np.array(source1.data['a']) eg_arr = np.array(source1.data['eg']) np.put(a_arr, np.where(eg_arr == eg)[0], a) source1.data.update({'a': a_arr}) # grid_bg def update_bg_color(attr, old, new): float_alpha = float(sel_bgc_alpha.value) if 0 in ed.chk_color_apply: p1.background_fill_color = sel_bgc.value p1.background_fill_alpha = float_alpha p2.background_fill_color = sel_bgc.value p2.background_fill_alpha = float_alpha ed.sel_bgc = sel_bgc.value ed.sel_bgc_alpha = sel_bgc_alpha.value if 1 in ed.chk_color_apply: box1.fill_color = sel_bgc.value box1.fill_alpha = float_alpha box2.fill_color = sel_bgc.value box2.fill_alpha = float_alpha ed.box_fill_color = sel_bgc.value ed.box_fill_alpha = float_alpha def update_grid_color(attr, old, new): float_alpha = float(sel_gridc_alpha.value) if 0 in ed.chk_color_apply: p1.grid.grid_line_color = sel_gridc.value p1.grid.grid_line_alpha = float_alpha p2.xgrid.grid_line_color = sel_gridc.value p2.xgrid.grid_line_alpha = float_alpha ed.sel_gridc = sel_gridc.value ed.sel_gridc_alpha = sel_gridc_alpha.value if 1 in ed.chk_color_apply: box1.line_color = sel_gridc.value box1.line_alpha = float_alpha box2.line_color = sel_gridc.value box2.line_alpha = float_alpha ed.box_line_color = sel_gridc.value ed.box_line_alpha = float_alpha def reset_colors(): temp_chk_color_apply = ed.chk_color_apply ed.chk_color_apply = [0, 1] sel_bgc.value = 'White' sel_bgc_alpha.value = '.10' sel_gridc.value = 'Gray' sel_gridc_alpha.value = '.20' ed.sel_bgc = 'White' ed.sel_bgc_alpha = '.10' ed.sel_gridc = 'Gray' ed.sel_gridc_alpha = '.20' if chk_minor_grid.active: p1.grid.minor_grid_line_color = 'Gray' p1.grid.minor_grid_line_alpha = .20 else: p1.grid.minor_grid_line_alpha = 0.0 sel_box_line_width.value = '1.0' box1.line_color = 'black' box1.line_alpha = .8 box2.line_color = 'black' box2.line_alpha = .8 box1.fill_color = 'black' box1.fill_alpha = .05 box2.fill_color = 'black' box2.fill_alpha = .05 ed.chk_color_apply = temp_chk_color_apply def minor_grid(attr, old, new): if chk_minor_grid.active: p1.grid.minor_grid_line_color = ed.sel_gridc p1.grid.minor_grid_line_alpha = float(ed.sel_gridc_alpha) else: p1.grid.minor_grid_line_alpha = 0.0 ed.chk_minor_grid = list(chk_minor_grid.active) def color_apply(attr, old, new): ed.chk_color_apply = list(chk_color_apply.active) def edit_line_width(attr, old, new): ed.box_line_width = sel_box_line_width.value box1.line_width = float(new) box2.line_width = float(new) # hover def hover_tool_control(attr, old, new): ed.chk_hover_sel = list(chk_hover_sel.active) ed.chk_hover_on = list(chk_hover_on.active) manage_hover_tool() # make html for tooltip formatting def manage_hover_tool(): if ed.chk_hover_on and ed.chk_hover_sel: pre_div = ('<div style="background-color:' + 'rgba(0, 0, 0, 0.03);' + 'overflow: auto;">') mid_div = '' suf_div = '</div>' for key in ed.chk_hover_sel: col = hdict[key][0] if col != ed.sel_measure: mid_div += html_str % (col, ' ' + hdict[key][1]) hover_tool.data.tooltips = pre_div + mid_div + suf_div else: hover_tool.data.tooltips = None tool_tips.data = None hover_cols.data = [] # density (jitter stripplot) def update_stripplot(): source2.data = dict(a=[], c=[], eg=[], s=[], x=[]) source2.data = dict(a=strip_df.data['a'], c=strip_df.data['c'], eg=strip_df.data['eg'], s=strip_df.data['s'], x=strip_df.data['prop_s']) def update_scat_size_p2(attr, old, new): size_arr = np.full(num_dots, new) source2.data.update({'s': size_arr}) ed.p2_marker_size = new def update_scat_alpha_p2(attr, old, new): size_arr = np.full(num_dots, new) source2.data.update({'a': size_arr}) ed.p2_marker_alpha = new # edit range def update_edit_range(attr, old, new): # Get slider values xl = slider_edit_zone.value[0] xh = slider_edit_zone.value[1] box1.left, box1.right = xh, xl xl2 = f.cross_val(filt_xax.data, xl, idx_xax.data) xh2 = f.cross_val(filt_xax.data, xh, idx_xax.data) box2.left, box2.right = xl2, xh2 # update editor dict namespace ed.x_low = xl ed.x_high = xh # -----END Callback functions------------------------------- # -----START Callback actions------------------------------- # squeeze sel_sqz_type.on_change('value', sqz_type_change) sel_sqz_dir.on_change('value', sqz_dir_change) sel_emp_grp.on_change('value', emp_group_change) slider_squeeze.on_change('value', update_squeeze) but_squeeze.on_click(perform_squeeze) but_0add.on_click(line0_add) but_0sub.on_click(line0_sub) but_1add.on_click(line1_add) but_1sub.on_click(line1_sub) # extra filters sel_filt1.on_change('value', update_sel_filt1) sel_filt2.on_change('value', update_sel_filt2) sel_filt3.on_change('value', update_sel_filt3) sel_oper1.on_change('value', update_oper1) sel_oper2.on_change('value', update_oper2) sel_oper3.on_change('value', update_oper3) txt_input1.on_change('value', update_txt_input1) txt_input2.on_change('value', update_txt_input2) txt_input3.on_change('value', update_txt_input3) # animate: but_play.on_click(animate) but_reset.on_click(reset) but_refresh.on_click(refresh) but_back.on_click(back1) but_fwd.on_click(fwd1) slider_animate.on_change('value', animate_source) # commented for future development... # chk_trails.on_change('active', prepare_animate) # proposal_save but_save_edit.on_click(save_edited_df) but_save_order.on_click(save_order_to_excel) sel_base.on_change('value', base_change) sel_cond.on_change('value', cond_change) sel_proposal.on_change('value', proposal_change) # center column sel_measure.on_change('value', measure_change) but_plot.on_click(plot_button) but_calc.on_click(calc_button) # display chk_filter.on_change('active', filter_change) chk_display.on_change('active', display_change) sel_mth_oper.on_change('value', month_oper_change) sel_mth_num.on_change('value', month_num_change) sel_ytype.on_change('value', ytype_change) sel_xtype.on_change('value', xtype_change) # size_alpha for eg, slider in sl_size_dict.items(): slider.on_change('value', partial(update_scat_size_p1, eg=eg)) for eg, slider in sl_alpha_dict.items(): slider.on_change('value', partial(update_scat_alpha_p1, eg=eg)) but_slider_reset.on_click(reset_sliders) but_slider_big.on_click(slider_big) but_slider_sml.on_click(slider_sml) but_slider_aup.on_click(slider_aup) but_slider_adn.on_click(slider_adn) # grid_bg sel_bgc.on_change('value', update_bg_color) sel_bgc_alpha.on_change('value', update_bg_color) sel_gridc.on_change('value', update_grid_color) sel_gridc_alpha.on_change('value', update_grid_color) but_reset_colors.on_click(reset_colors) chk_minor_grid.on_change('active', minor_grid) chk_color_apply.on_change('active', color_apply) sel_box_line_width.on_change('value', edit_line_width) # hover chk_hover_on.on_change('active', hover_tool_control) chk_hover_sel.on_change('active', hover_tool_control) # density (stripplot): slider_strip_size.on_change('value', update_scat_size_p2) slider_strip_alpha.on_change('value', update_scat_alpha_p2) # edit range slider slider_edit_zone.on_change('value', update_edit_range) # ------END Callback Actions------------------------------------- # ------START Initial Computations------------------------------- # Read skeleton dataset try: skel.data = pd.read_pickle('dill/skeleton.pkl') except OSError: # exit routine if baseline dataset not found print('skeleton.pkl not found, run make_skeleton.py?\n') print('\n >>> exiting routine.\n') sys.exit() # Read standalone dataset/assign baseline dataset try: ds_stand.data = pd.read_pickle('dill/standalone.pkl') if ed.sel_base == 'standalone': base_ds.data = ds_stand.data.copy() else: # set BASELINE dataset if something other than standalone try: base_ds.data = pd.read_pickle('dill/ds_' + ed.sel_base + '.pkl') except OSError: base_ds.data = ds_stand.data.copy() print('invalid "base_ds" name input?\n' + 'standalone set as base\n') except OSError: # exit routine if baseline dataset not found print('standalone.pkl or selected baseline dataset not found...\n' + 'run standalone.py?\n') print('\n >>> exiting routine.\n') sys.exit() # initial order and dataset generation find_order() calc_dataset() join_dataset() p1, p2 = make_plots(return_plots=True) manage_hover_tool() # --------END Initial Computations------------------------------- # --------START Widget Layout------------------------------------ # PANEL1 # squeeze tab items squeeze_widgets = column(row(sel_sqz_type, spacer_sqz_but2, sel_emp_grp, spacer_sqz_but3, sel_sqz_dir), row(slider_squeeze), row(but_1add, spacer_toggle_1, but_1sub, spacer_toggle_center1, but_squeeze, spacer_toggle_center2, but_0add, spacer_toggle_2, but_0sub)) # extra filters tab items filter_widgets = row(column(sel_filt1, sel_filt2, sel_filt3), column(sel_oper1, sel_oper2, sel_oper3), column(txt_input1, txt_input2, txt_input3)) # animate tab items anim_row1 = row(but_play, spacer_anim1, but_reset) anim_row2 = row(slider_animate) anim_row3 = row(but_back, spacer_anim2, but_fwd, spacer_anim_refresh, but_refresh) anim_col1 = column(anim_row1, anim_row2, anim_row3) # the commented items below are on hold for future development... # anim_col2 = column(chk_trails, sel_trails) # anim_items = row(anim_col1, spacer_anim, anim_col2) anim_items = row(anim_col1) # proposal_save save_buttons = column(spacer_top_save, row(but_save_edit), row(but_save_order)) save_dropdowns = column(sel_base, sel_cond, sel_proposal) save_widgets = row(save_buttons, spacer_middle_save, save_dropdowns) # make panels for main tab group panel1_tab1 = Panel(child=squeeze_widgets, title='squeeze') panel1_tab2 = Panel(child=filter_widgets, title='extra filters') panel1_tab3 = Panel(child=anim_items, title='animate') panel1_tab4 = Panel(child=save_widgets, title='proposal_save') # combine main panels into panel1 tab object panel1 = Tabs(tabs=[panel1_tab1, panel1_tab2, panel1_tab3, panel1_tab4], width=panel1_width, height=controls_height) # CENTER COLUMN buttons_and_attr_sel = column(spacer_top_center_col, sel_measure, but_plot, but_calc, height=controls_height, width=sel_width) # PANEL2 # display but_row1 = row(spacer_disp_mth1, sel_mth_oper, spacer_disp_mth2, sel_mth_num) but_row2 = row(spacer_disp_ax1, sel_ytype, spacer_disp_ax2, sel_xtype) but_col = column(spacer_top_disp, but_row1, but_row2) chk_col = column(chk_filter, chk_display) # display tab items display_widgets = row(chk_col, but_col) # size_alpha tab items szal_sliders = row(slider_list) sz_buttons = row(but_slider_sml, spacer_size_buts, but_slider_big) al_buttons = row(but_slider_adn, spacer_alpha_buts, but_slider_aup) szal_but_col = column(spacer_top_size_alpha, but_slider_reset, sz_buttons, al_buttons, width=120) szal_items = row(szal_sliders, szal_but_col) # grid_bg tab items gbg_col1 = column(sel_bgc, sel_gridc, height=chart_sel_height) gbg_col2 = column(sel_bgc_alpha, sel_gridc_alpha, height=chart_sel_height) gbg_col12 = row(gbg_col1, spacer_linesbg_col, gbg_col2, spacer_linesbg_col2) gbg_bottom_row = row(but_reset_colors, spacer_linesbg_bottom, chk_minor_grid, width=200) gbg_left = column(gbg_col12, gbg_bottom_row, width=300) gbg_col3 = column(spacer_top_color_apply, chk_color_apply, sel_box_line_width) gbg_items = row(gbg_left, gbg_col3) # hover tab items hover_row = row(chk_hover_on, chk_hover_sel) # make panels for aux tab group panel2_tab1 = Panel(child=display_widgets, title='display') panel2_tab2 = Panel(child=szal_items, title='size_alpha') panel2_tab3 = Panel(child=gbg_items, title='grid_bg') panel2_tab4 = Panel(child=hover_row, title='hover') panel2_tab5 = Panel(child=column(slider_strip_size, slider_strip_alpha), title='density') # combine aux panels into panel2 tab object panel2 = Tabs(tabs=[panel2_tab1, panel2_tab2, panel2_tab3, panel2_tab4, panel2_tab5], height=controls_height, width=panel2_width) # --------END Widget Layout-------------------------------------- # --------START Main Layout-------------------------------------- p1_row = row(p1) p2_row = row(p2) p1.add_layout(calc_note) p1.add_layout(plot_note) l_o = layout(row(panel1, spacer_controls1, buttons_and_attr_sel, spacer_controls2, panel2), row(spacer_edit, slider_edit_zone), p1_row, p2_row) doc.add_root(l_o) return doc
# --------END Main Layout----------------------------------------
[docs]def color_list(): '''provides a list of string color names for editor grid_bg tab color selectors ''' colors = ['AliceBlue', 'AntiqueWhite', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 'Black', 'BlanchedAlmond', 'Blue', 'BlueViolet', 'Brown', 'BurlyWood', 'CadetBlue', 'Chartreuse', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 'DarkBlue', 'DarkCyan', 'DarkGoldenRod', 'DarkGray', 'DarkGrey', 'DarkGreen', 'DarkKhaki', 'DarkMagenta', 'DarkOliveGreen', 'Darkorange', 'DarkOrchid', 'DarkRed', 'DarkSalmon', 'DarkSeaGreen', 'DarkSlateBlue', 'DarkSlateGray', 'DarkSlateGrey', 'DarkTurquoise', 'DarkViolet', 'DeepPink', 'DeepSkyBlue', 'DimGray', 'DimGrey', 'DodgerBlue', 'FireBrick', 'FloralWhite', 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 'Gray', 'Grey', 'Green', 'GreenYellow', 'HoneyDew', 'HotPink', 'IndianRed', 'Indigo', 'Ivory', 'Khaki', 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 'LimeGreen', 'Linen', 'Magenta', 'Maroon', 'MediumAquaMarine', 'MediumBlue', 'MediumOrchid', 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 'MediumTurquoise', 'MediumVioletRed', 'MidnightBlue', 'MintCream', 'MistyRose', 'Moccasin', 'NavajoWhite', 'Navy', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Salmon', 'SandyBrown', 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'Tan', 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 'WhiteSmoke', 'Yellow', 'YellowGreen'] return colors
[docs]def alpha_list(): '''provides a list of string decimals for editor grid_bg tab alpha selectors ''' alphas = ['.00', '.01', '.02', '.03', '.04', '.05', '.06', '.07', '.08', '.09', '.10', '.11', '.12', '.13', '.14', '.15', '.16', '.17', '.18', '.19', '.20', '.21', '.22', '.23', '.24', '.25', '.26', '.27', '.28', '.29', '.30', '.31', '.32', '.33', '.34', '.35', '.36', '.37', '.38', '.39', '.40', '.41', '.42', '.43', '.44', '.45', '.46', '.47', '.48', '.49', '.50', '.51', '.52', '.53', '.54', '.55', '.56', '.57', '.58', '.59', '.60', '.61', '.62', '.63', '.64', '.66', '.68', '.70', '.72', '.75', '.77', '.80', '.82', '.85', '.87', '.90', '.92', '.95', '.97', '1.0'] return alphas
[docs]def line_widths(): '''provides a list of string decimals for editor grid_bg tab edit line width selector ''' widths = ['0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9', '1.0', '1.1', '1.2', '1.3', '1.4', '1.5', '1.6', '1.7', '1.8', '1.9', '2.0'] return widths
[docs]def use_first_proposal_found(proposal_name): '''find and return the first list order found in 'dill/proposal_names.pkl'. This function is used when another proposal name is designated by another section of the program but does not exist. inputs proposal_name (string) the name of the proposal which was not found ''' try: prop_names = \ pd.read_pickle('dill/proposal_names.pkl').proposals.tolist() this_prop_name = prop_names[0] stored_case = pd.read_pickle('dill/case_dill.pkl').case.value print('\nerror : proposal name "' + str(proposal_name) + '" not found...\n') print('available proposal names are ', prop_names, 'for case study:', stored_case) print('< using ' + this_prop_name + '>') return pd.read_pickle('dill/p_' + this_prop_name + '.pkl'), \ this_prop_name except OSError: print('dill/proposal_names.pkl' + ' or ' + 'dill/case_dill.pkl' + ' not found') print('\n >>> exiting routine.\n') sys.exit()
[docs]def make_dataset(proposal_name='', df_order=None, # list order conditions=[], ds=None, # skeleton input ds_stand=None): # used to calculate pre-implementation data pre, suf = 'dill/', '.pkl' order_name = 'p_' + proposal_name # dataset_name = 'ds_' + proposal_name order_file = (pre + order_name + suf) sdict = pd.read_pickle('dill/dict_settings.pkl') tdict = pd.read_pickle('dill/dict_job_tables.pkl') num_of_job_levels = sdict['num_of_job_levels'] lspcnt_calc = sdict['lspcnt_calc_on_remaining_population'] try: df_master = pd.read_pickle(pre + 'master' + suf) except OSError: print('Master list not found. Run build_program_files script?') sys.exit() # do not include inactive employees (other than furlough) in data model df_master = df_master[ (df_master.line == 1) | (df_master.fur == 1)].copy() # ORDER the skeleton df according to INTEGRATED list order. # df_skel can initially be in any integrated order, each employee # group must be in proper order relative to itself. # Use the short-form 'idx' (order) column from either the proposed # list or the new_order column from an edited list to create a new column, # 'new_order', within the long-form df_skel. The new order column # is created by data alignment using the common empkey indexes. # The skeleton may then be sorted by month and new_order. # (note: duplicate df_skel empkey index empkeys (from different months) # are assigned the same order value) if proposal_name == 'edit': df_new_order = pd.read_pickle(order_file) # if 'idx' in df_new_order.columns: # df_new_order.rename(columns={'idx': 'new_order'}, inplace=True) ds['new_order'] = df_new_order['new_order'] # dataset_file = (pre + 'ds_edit' + suf) else: ds_index = ds[ds.mnum == 0].index.values df_order_index = df_order.index.values # mask will remove any inactive employees existing # within the list df_order proposal mask = np.isin(df_order_index, ds_index) df_order = df_order[mask].copy() df_order_vals = df_order['idx'].values # assign back to df_order column to permit index data alignment... df_order['idx'] = st.rankdata(df_order_vals).astype(int) ds['new_order'] = df_order['idx'] # dataset_file = (pre + dataset_name + suf) # sort the skeleton by month and proposed list order ds.sort_values(['mnum', 'new_order'], inplace=True) # ORIG_JOB* eg_sequence = df_master.eg.values fur_sequence = df_master.fur.values # create list of employee group codes from the master data egs = sorted(pd.unique(eg_sequence)) # retrieve job counts array jcnts_arr = tdict['jcnts_arr'] if 'prex' in conditions: sg_rights = sdict['sg_rights'] sg_eg_list = [] sg_dict = od() stove_dict = od() # Find the employee groups which have pre-existing job rights... # grab the eg code from each sg (special group) job right description # and add to sg_eg_list for line_item in sg_rights: sg_eg_list.append(line_item[0]) # place unique eg codes into sorted list sg_eg_list = sorted(pd.unique(sg_eg_list)) # Make a dictionary containing the special group data for each # group with special rights for eg in sg_eg_list: sg_data = [] for line_item in sg_rights: if line_item[0] == eg: sg_data.append(line_item) sg_dict[eg] = sg_data for eg in egs: if eg in sg_eg_list: # (run prex stovepipe routine with eg dict key and value) sg = df_master[df_master.eg == eg]['sg'].values fur = df_master[df_master.eg == eg]['fur'] ojob_array = f.make_stovepipe_prex_shortform( jcnts_arr[0][eg - 1], sg, sg_dict[eg], fur) prex_stove = np.take(ojob_array, np.where(fur == 0)[0]) stove_dict[eg] = prex_stove else: # (run make_stovepipe routine with eg dict key and value) stove_dict[eg] = f.make_stovepipe_jobs_from_jobs_arr( jcnts_arr[0][eg - 1]) # use dict values as inputs to sp_arr, # ordered dict maintains proper sequence... sp_arr = list(np.array(list(stove_dict.values()))) # total of jobs per eg eg_job_counts = np.add.reduce(jcnts_arr[0], axis=1) orig_jobs = f.make_intgrtd_from_sep_stove_lists(sp_arr, eg_sequence, fur_sequence, eg_job_counts, num_of_job_levels) else: orig_jobs = f.make_original_jobs_from_counts( jcnts_arr[0], eg_sequence, fur_sequence, num_of_job_levels).astype(int) # insert stovepipe job result into new column of proposal (month_form) # this indexes the jobs with empkeys (orig_jobs is an ndarray only) df_master['orig_job'] = orig_jobs # ASSIGN JOBS - flush and no flush option* # cmonths - career length in months for each employee. # length is equal to number of employees cmonths = f.career_months(df_master, sdict['starting_date']) # nonret_each_month: count of non-retired employees remaining # in each month until no more remain - # length is equal to longest career length nonret_each_month = f.count_per_month(cmonths) all_months = np.sum(nonret_each_month) high_limits = nonret_each_month.cumsum() low_limits = f.make_lower_slice_limits(high_limits) # job_level_counts = np.array(jcnts_arr[1]) if sdict['delayed_implementation']: imp_month = sdict['imp_month'] imp_low = low_limits[imp_month] imp_high = high_limits[imp_month] # # read the standalone dataset (info is not in integrated order) # ds_stand = pd.read_pickle(stand_path_string) # get standalone data and order it the same as the integrated dataset. # create a unique key column in the standalone data df and a temporary # df which is ordered according to the integrated dataset imp_cols, arr_dict, col_array = \ f.make_preimp_array(ds_stand, ds, imp_high, sdict['compute_job_category_order'], sdict['compute_pay_measures']) # select columns to use as pre-implementation data for integrated # dataset data is limited to the pre-implementation months # aligned_jnums and aligned_fur arrays are the same as standalone data # up to the end of the implementation month, then the standalone value # for the implementation month is passed down unchanged for the # remainder of months in the model. These arrays carry over # standalone data for each employee group to be honored until and when # the integrated list is implemented. # These values from the standalone datasets (furlough status and # standalone job held at the implementation date) are needed for # subsequent integrated dataset job assignment calculations. Other # standalone values are simply copied and inserted into the # pre-implementation months of the integrated dataset. delayed_jnums = col_array[arr_dict['jnum']] delayed_fur = col_array[arr_dict['fur']] aligned_jnums = f.align_fill_down(imp_low, imp_high, ds[[]], # indexed with empkeys delayed_jnums) aligned_fur = f.align_fill_down(imp_low, imp_high, ds[[]], delayed_fur) # now assign "filled-down" job numbers to numpy array delayed_jnums[imp_low:] = aligned_jnums[imp_low:] delayed_fur[imp_low:] = aligned_fur[imp_low:] # ORIG_JOB and FUR (delayed implementation) # then assign numpy array values to orig_job column of integrated # dataset as starting point for integrated job assignments ds['orig_job'] = delayed_jnums ds['fur'] = delayed_fur if sdict['integrated_counts_preimp']: # assign combined job counts prior to the implementation date. # (otherwise, separate employee group counts will be used when # data is transferred from col_array at end of script) # NOTE: this data is the actual number of jobs held within each # category; could be less than the number of jobs available as # attrition occurs standalone_preimp_job_counts = \ f.make_delayed_job_counts(imp_month, delayed_jnums, low_limits, high_limits) col_array[arr_dict['job_count']][:imp_high] = \ standalone_preimp_job_counts else: # set implementation month at zero for job assignment routine imp_month = 0 # ORIG_JOB and FUR (no delayed implementation) # transfer proposal stovepipe jobs (month_form) to long_form via index # (empkey) alignment... ds['orig_job'] = df_master['orig_job'] # developer note: test to verify this is not instantiated elsewhere... ds['fur'] = df_master['fur'] table = tdict['table'] j_changes = tdict['j_changes'] reduction_months = f.get_job_reduction_months(j_changes) # copy selected columns from ds for job assignment function input below. # note: if delayed implementation, the 'fur' and 'orig_job' columns # contain standalone data through the implementation month. df_align = ds[['eg', 'sg', 'fur', 'orig_job']].copy() # JNUM, FUR, JOB_COUNT if sdict['no_bump']: # No bump, no flush option (includes conditions, furlough/recall, # job changes schedules) # this is the main job assignment function. It loops through all of # the months in the model and assigns jobs nbnf, job_count, fur = \ f.assign_jobs_nbnf_job_changes(df_align, low_limits, high_limits, all_months, reduction_months, imp_month, conditions, sdict, tdict, fur_return=sdict['recall']) ds['jnum'] = nbnf ds['job_count'] = job_count ds['fur'] = fur # for create_snum_and_spcnt_arrays function input... jnum_jobs = nbnf else: # Full flush and bump option (no conditions or # furlough/recall schedulue considered, job changes are included) # No bump, no flush applied up to implementation date fbff, job_count, fur = f.assign_jobs_full_flush_job_changes( nonret_each_month, table[0], num_of_job_levels) ds['jnum'] = fbff ds['job_count'] = job_count ds['fur'] = fur # for create_snum_and_spcnt_arrays function input... jnum_jobs = fbff # SNUM, SPCNT, LNUM, LSPCNT monthly_job_counts = table[1] ds['snum'], ds['spcnt'], ds['lnum'], ds['lspcnt'] = \ f.create_snum_and_spcnt_arrays(jnum_jobs, num_of_job_levels, nonret_each_month, monthly_job_counts, lspcnt_calc) # RANK in JOB ds['rank_in_job'] = ds.groupby(['mnum', 'jnum'], sort=False).cumcount() + 1 # JOBP jpcnt = (ds.rank_in_job / ds.job_count).values np.put(jpcnt, np.where(jpcnt == 1.0)[0], .99999) ds['jobp'] = ds['jnum'] + jpcnt # PAY - merge with pay table - provides monthly pay if sdict['compute_pay_measures']: # account for furlough time (only count active months) if sdict['discount_longev_for_fur']: # skel(ds) provides pre-calculated non-discounted scale data # flip ones and zeros... ds['non_fur'] = 1 - ds.fur.values non_fur = ds.groupby([pd.Grouper('empkey')])['non_fur'] \ .cumsum().values ds.pop('non_fur') starting_mlong = ds.s_lmonths.values cum_active_months = non_fur + starting_mlong ds['mlong'] = cum_active_months ds['ylong'] = ds['mlong'].values / 12 ds['scale'] = np.clip((cum_active_months / 12) + 1, 1, sdict['top_of_scale']).astype(int) # make a new long_form dataframe and assign a combination of # pay-related ds columns from large dataset as its index... # the dataframe is empty - we are only making an index-alignment # vehicle to use with indexed pay table.... # the dataframe index contains specific scale, job, and contract year # for each line in long_form ds df_pt_index = pd.DataFrame(index=((ds['scale'].values * 100) + ds['jnum'].values + (ds['year'].values * 100000))) if sdict['enhanced_jobs']: df_pt = pd.read_pickle('dill/pay_table_enhanced.pkl') else: df_pt = pd.read_pickle('dill/pay_table_basic.pkl') # 'data-align' small indexed pay_table to long_form df: df_pt_index['monthly'] = df_pt['monthly'] ds['monthly'] = df_pt_index.monthly.values # MPAY # adjust monthly pay for any raise and last month pay percent if # applicable ds['mpay'] = ((ds['pay_raise'].values * ds['mth_pcnt'].values * ds['monthly'].values)) / 1000 ds.pop('monthly') # CPAY ds['cpay'] = ds.groupby('new_order')['mpay'].cumsum() if sdict['delayed_implementation']: ds_cols = ds.columns # grab each imp_col (column to insert standalone or pre-implementation # date data) and replace integrated data up through implementation # date for col in imp_cols: if col in ds_cols: arr = ds[col].values arr[:imp_high] = col_array[arr_dict[col]][:imp_high] ds[col] = arr # CAT_ORDER # global job ranking if sdict['compute_job_category_order']: ds['cat_order'] = f.make_cat_order(ds, table[0]) return(ds)