Keith_Edmonds_vote_sim/utils.py at master · endolith/Keith_Edmonds_vote_sim · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
import pandas as pd
import numpy as np


def get_winners(S_in, Selection='Utilitarian', Reweight='Cap Score', KP_Transform=False, W=5, K=5):
    """
    Turn scores into a winner set for various systems

    Parameters
    ----------
    S_in : pandas.DataFrame
        Table of scores given to each candidate by each voter
    Selection : {'Utilitarian', 'STAR', 'Hare_Ballots'}, optional
        Default is 'Utilitarian'
    Reweight : {'Cap Score', 'Scale Score','Jefferson', 'Webster', 'Allocate', 'Allocate Current''}, optional
        Default is 'Cap Score'
    W : int, optional
        Maximum number of winners to return. Default is 5.
    K : int, optional
        Maximum possible score. Default is 5.
    """
    # To ensure float division in Python 2?
    W = float(W)

    # Create the working set of scores
    if KP_Transform:
        # The KP transform changes each voter into a set of K approval voters
        groups = []
        for threshold in range(K):
            groups.append(np.where(S_in.values > threshold, 1, 0))
        S_wrk = pd.DataFrame(np.concatenate(groups), columns=S_in.columns)
    else:
        # Normalise so scores are in [0, 1]
        S_wrk = pd.DataFrame(S_in.values/K, columns=S_in.columns)

    V = S_wrk.shape[0]

    # Make copy of working scores
    S_orig = S_wrk.copy()

    # These only matter for specific systems and are initialized here
    ballot_weight = pd.Series(np.ones(V),name='ballot_weight')


    # Populate winners in a loop
    winner_list = []
    while len(winner_list) < W:
        #round
        #R = len(winner_list)

        #select winner
        #w = index with highest selection metric
        if Selection == 'STAR':
            #find top two
            df_tops = S_wrk[S_wrk.sum().nlargest(2, keep='all').index]
            #Run off winner
            w = df_tops.eq(df_tops.max(1), axis=0).sum().idxmax()
        elif Selection == 'Hare_Ballots':
            # Find candidate with the highest vote sum in a hare quota of ballot weights

            # Sort each candidate by scores, from highest to lowest
            sort_idx = np.argsort(-S_orig.values, axis=0)

            # Collect ballot weights in same sorted order
            weights = ballot_weight.values[sort_idx]

            # Accumulate weights for each candidate
            sums = np.cumsum(weights, axis=0)

            # Accumulated weights under threshold
            thres = (sums < V/W)

            #Weight Scores
            weighted_scores = S_orig.mul(ballot_weight, axis = 0)

            # Sum scores for candidates under threshold
            c_score = np.sum(thres * np.take_along_axis(weighted_scores.values, sort_idx, axis=0),axis=0)
            w = S_orig.columns[np.argmax(c_score)]
        elif Selection == 'Utilitarian':
            w = S_wrk.sum().idxmax()

        winner_list.append(w)

        #Reweight the working scores
        if Reweight == 'Cap Score':
            surplus_factor = max( S_wrk[w].sum() *W/V , 1.0)

            #Score spent on each winner by each voter
            score_spent = S_wrk[w]/ surplus_factor
            # print('Score Spent ',score_spent.sum())

            #Total score left to be spent by each voter
            ballot_weight = (ballot_weight-score_spent).clip(0.0,1.0)

            #Update Ballots
            #set scores to zero for winner so they don't win again
            #in most simulations this would not be needed since you would want to check for for
            #S_wrk[w]=0
            #Take score off of ballot (ie reweight)
            mins = np.minimum(S_wrk.values,ballot_weight.values[:, np.newaxis])
            S_wrk = pd.DataFrame(mins, columns = S_wrk.columns)

        elif Reweight == 'Scale Score':

            #Check for Surplus
            surplus_factor = max( S_wrk[w].sum() *W/V , 1.0)

            score_spent = S_wrk[w]/ surplus_factor

            #Total score left to be spent by each voter
            ballot_weight = (ballot_weight-score_spent).clip(0.0,1.0)

            S_wrk = S_orig.mul(ballot_weight, axis = 0)
        elif Reweight == 'Jefferson':
            total_sum =  S_orig[winner_list].sum(axis=1)
            #Ballot weight as defined by the Jefferson method
            ballot_weight = 1/(total_sum + 1)
            S_wrk = S_orig.mul(ballot_weight, axis = 0)

        elif Reweight == 'Webster':
            total_sum =  S_orig[winner_list].sum(axis=1)
            #Ballot weight as defined by the Webster method
            ballot_weight = 1/(2*total_sum + 1)
            S_wrk = S_orig.mul(ballot_weight, axis = 0)

        elif Reweight == 'Allocate':
            quota = round(V/W)

            cand_df = pd.concat([ballot_weight,S_orig[w]], axis=1).copy()
            cand_df_sort = cand_df.sort_values(by=[w], ascending=False)

            #find the score where everybody abote is allocated
            split_point = cand_df_sort[cand_df_sort['ballot_weight'].cumsum() < V/W][w].iloc[-1]

            #if split point <0 then a full quota is not spent
            if split_point>0:
                #Amount of ballot for voters who voted on the split point
                voters_on_split = cand_df[cand_df[w] == split_point]['ballot_weight'].sum()

                #Amount of ballot for voters who voted more than the split point
                voters_allocated = cand_df[cand_df[w] > split_point]['ballot_weight'].sum()

                #amount to reweight the voters on the split by (ie surpluss handling)
                reweighted_value = 1 - (quota - voters_allocated)/voters_on_split

                #reweight voters on split
                cand_df.loc[cand_df[w] == split_point, 'ballot_weight'] = cand_df.loc[cand_df[w] == split_point, 'ballot_weight'] * reweighted_value

            #exhause ballots for those above split
            cand_df.loc[cand_df[w] >split_point, 'ballot_weight'] = 0

            #update
            ballot_weight = cand_df['ballot_weight']
            S_wrk = S_orig.mul(ballot_weight, axis = 0)

        elif Reweight == 'Allocate Current':
            quota = round(V/W)
             #Create lists for manipulation
            cand_df = pd.concat([ballot_weight,S_wrk[w]], axis=1).copy()
            cand_df_sort = cand_df.sort_values(by=[w], ascending=False).copy()

            #find the score where a quota is filled
            split_point = cand_df_sort[cand_df_sort['ballot_weight'].cumsum() < V/W][w].min()
            #print('split_point',split_point*5)

            #Amount of ballot for voters who voted more than the split point
            spent_above = cand_df[cand_df[w] > split_point]['ballot_weight'].sum()
            #print('spent_above',spent_above)

            #Exhaust all ballots above split point
            if spent_above>0:
                cand_df.loc[cand_df[w] > split_point, 'ballot_weight'] = 0.0

            #if split point = 0 then the winner did not get a full quota of support
            #otherwise there is a surplus

            #Amount of ballot for voters who gave a score on the split point
            weight_on_split = cand_df[cand_df[w] == split_point]['ballot_weight'].sum()
            #print('weight_on_split',weight_on_split)

            if weight_on_split>0:
                #Fraction of ballot on split needed to be spent
                spent_value = (quota - spent_above)/weight_on_split

                #Take the spent value from the voters on the threshold evenly
                cand_df.loc[cand_df[w] == split_point, 'ballot_weight'] = cand_df.loc[cand_df[w] == split_point, 'ballot_weight'] * (1 - spent_value)


            #print('Fraction of quota spent ', (ballot_weight.sum() - cand_df['ballot_weight'].sum())/quota)
            #ballot_weight = cand_df['ballot_weight'].clip(0.0,1.0)
            ballot_weight = cand_df['ballot_weight']
            S_wrk = S_orig.mul(ballot_weight, axis = 0)

    return winner_list

#Method to get all output quality metrics for a winner set

def get_metrics(S_in,metrics,winner_list,method,K=5):

    #store metrics for each method
    if not metrics:
        average_utility = {}
        average_ln_utility = {}
        average_favored_winner_utility = {}
        average_unsatisfied_utility = {}
        fully_satisfied_voters = {}
        totally_unsatisfied_voters = {}
        harmonic_quality = {}
        unitary_quality = {}
        ebert_cost = {}
        most_blocking_loser_capture = {}
        largest_total_unsatisfied_group = {}
        average_utility_gain_from_extra_winner = {}
        utility_deviation = {}
        score_deviation = {}
        favored_winner_deviation = {}
        number_of_duplicates = {}
        average_winner_polarization = {}
        most_polarized_winner = {}
        least_polarized_winner = {}


        metrics = {
                    'average_utility' : average_utility,
                    'average_ln_utility' : average_ln_utility,
                    'average_favored_winner_utility' : average_favored_winner_utility,
                    'average_unsatisfied_utility' : average_unsatisfied_utility,
                    'fully_satisfied_voters' : fully_satisfied_voters,
                    'totally_unsatisfied_voters' : totally_unsatisfied_voters,
                    'harmonic_quality' : harmonic_quality,
                    'unitary_quality' : unitary_quality,
                    'ebert_cost' : ebert_cost,
                    'most_blocking_loser_capture' : most_blocking_loser_capture,
                    'largest_total_unsatisfied_group' : largest_total_unsatisfied_group,
                    'average_utility_gain_from_extra_winner' : average_utility_gain_from_extra_winner,
                    'utility_deviation' : utility_deviation,
                    'score_deviation' : score_deviation,
                    'favored_winner_deviation' : favored_winner_deviation,
                    'number_of_duplicates' : number_of_duplicates,
                    'average_winner_polarization' : average_winner_polarization,
                    'most_polarized_winner' : most_polarized_winner,
                    'least_polarized_winner' : least_polarized_winner

                                        }

    S_norm = S_in.divide(K)
    S_winners = S_norm[winner_list]
    V = S_norm.shape[0]
    W = len(winner_list)
    #quota = W/V
    #Utility Metrics
    metrics['average_utility'][method] = S_winners.sum(axis=1).sum()  / V
    metrics['average_ln_utility'][method] = np.log1p(S_winners.sum(axis=1)).sum()  / V
    metrics['average_favored_winner_utility'][method] = S_winners.max(axis=1).sum()  / V
    metrics['average_unsatisfied_utility'][method] = sum([1-i for i in S_winners.sum(axis=1) if i < 1]) / V
    metrics['fully_satisfied_voters'][method] = sum([(i>=1) for i in S_winners.sum(axis=1)])  / V
    metrics['totally_unsatisfied_voters'][method] = sum([(i==0) for i in S_winners.sum(axis=1)])  / V

    #Represenation Metrics
    metrics['harmonic_quality'][method] = np.divide(S_winners.values , np.argsort(S_winners.values, axis=1) +1).sum()  / V
    metrics['unitary_quality'][method] = S_winners.divide((S_winners.sum() * W/V).clip(lower=1)).sum(axis = 1).clip(upper=1).sum() / V
    metrics['ebert_cost'][method] = (S_winners.divide(S_winners.sum() * W/V).sum(axis = 1)**2).sum() / V
    metrics['most_blocking_loser_capture'][method] = S_norm.gt((S_winners.sum(axis = 1)), axis=0).sum().max() / V
    metrics['largest_total_unsatisfied_group'][method] = S_norm[S_winners.sum(axis = 1) == 0 ].astype(bool).sum(axis=0).max() / V
    metrics['average_utility_gain_from_extra_winner'][method] = S_norm.sub(S_winners.sum(axis = 1),axis = 0).clip(lower=0).sum(axis = 0).max() / V

    #Equity Metrics
    metrics['utility_deviation'][method] = S_winners.sum(axis=1).std()
    metrics['score_deviation'][method] = S_winners.values.flatten().std()
    metrics['favored_winner_deviation'][method] = S_winners.max(axis=1).std()
    metrics['number_of_duplicates'][method] = len(winner_list) -len(set(winner_list))
    metrics['average_winner_polarization'][method] = S_winners.std(axis=0).mean()
    metrics['most_polarized_winner'][method] = S_winners.std(axis=0).max()
    metrics['least_polarized_winner'][method] = S_winners.std(axis=0).min()

    return   metrics


def plot_metric(df, Methods,axis,is_int = True):
    #plots metrics
    #colors = ['b','r','k','#FFFF00','g','#808080','#56B4E9','#FF7F00']
    colors = {'Jefferson' : '#FF7F00','Webster' : 'b', 'Allocate' : 'r','Cap Score' : 'k',
              'Allocate Current' : '#FFFF00','Scale Score' : 'm','Allocate_KP' : 'g','Cap Score_KP' : '#808080'}
    styles = {'Utilitarian' : 'solid', 'STAR' : 'dashed', 'Hare_Ballots' : 'dotted'}
    bins = np.linspace(df.min().min(),df.max().max())
    for i, col in enumerate(df.columns):
        reweight = Methods[col]['Reweight']
        if Methods[col]['KP_Transform']: reweight = reweight + '_KP'
        selection = Methods[col]['Selection']
        count, bins, ignored = axis.hist(list(df[col]), bins = bins, color=colors[reweight],linestyle = styles[selection] ,histtype = 'step', label=col)
        if is_int:
            textstr = '$\mu=%.0f$\n$\sigma=%.0f$'%(df[col].mean(), df[col].std())
        else:
            textstr = '$\mu=%.3f$\n$\sigma=%.3f$'%(df[col].mean(), df[col].std())
        props = dict(boxstyle='round', facecolor='white', ec=colors[reweight],linestyle = styles[selection], alpha=0.8)
        axis.text(0.98, 0.95-0.1*i, textstr, transform=axis.transAxes, bbox=props, verticalalignment='top',horizontalalignment='right')
    return axis