My toolkit of choice for plotting or graphing data happens to be the combination of numpy, matplotlib, and scipy. I immediately found the function I was looking for: matplotlib.pyplot.boxplot. Unfortunately, I wanted the box plot to show information about the 99th percentile, and woe, this function will only draw whiskers based on the IQR.
So, I wrote my own box plot implementation, that's a little bit more generic and most importantly met my needs.
# @author: Aaron Blankstein from scipy.stats import scoreatpercentile class boxplotter(object): def __init__(self, median, top, bottom, whisk_top=None, whisk_bottom=None): self.median = median self.top = top self.bott = bottom self.whisk_top = whisk_top self.whisk_bott = whisk_bottom def draw_on(self, ax, index, box_color = "blue", median_color = "red", whisker_color = "black"): width = .7 w2 = width / 2 ax.broken_barh([(index - w2, width)], (self.bott,self.top - self.bott), facecolor="white",edgecolor=box_color) ax.broken_barh([(index - w2, width)], (self.median,0), facecolor="white", edgecolor=median_color) if self.whisk_top is not None: ax.broken_barh([(index - w2, width)], (self.whisk_top,0), facecolor="white", edgecolor=whisker_color) ax.broken_barh([(index , 0)], (self.whisk_top, self.top-self.whisk_top), edgecolor=box_color,linestyle="dashed") if self.whisk_bott is not None: ax.broken_barh([(index - w2, width)], (self.whisk_bott,0), facecolor="white", edgecolor=whisker_color) ax.broken_barh([(index , 0)], (self.whisk_bott,self.bott-self.whisk_bott), edgecolor=box_color,linestyle="dashed") def percentile_box_plot(ax, data, indexer=None, box_top=75, box_bottom=25,whisker_top=99,whisker_bottom=1): if indexer is None: indexed_data = zip(range(1,len(data)+1), data) else: indexed_data = [(indexer(datum), datum) for datum in data] def get_whisk(vector, w): if w is None: return None return scoreatpercentile(vector, w) for index, x in indexed_data: bp = boxplotter(scoreatpercentile(x, 50), scoreatpercentile(x, box_top), scoreatpercentile(x, box_bottom), get_whisk(x, whisker_top), get_whisk(x, whisker_bottom)) bp.draw_on(ax, index) def example(): from pylab import rand, ones, concatenate import matplotlib.pyplot as plt # EXAMPLE data code from: # http://matplotlib.sourceforge.net/pyplots/boxplot_demo.py # fake up some data spread= rand(50) * 100 center = ones(25) * 50 flier_high = rand(10) * 100 + 100 flier_low = rand(10) * -100 data =concatenate((spread, center, flier_high, flier_low), 0) # fake up some more data spread= rand(50) * 100 center = ones(25) * 40 flier_high = rand(10) * 100 + 100 flier_low = rand(10) * -100 d2 = concatenate( (spread, center, flier_high, flier_low), 0 ) data.shape = (-1, 1) d2.shape = (-1, 1) data = [data, d2, d2[::2,0]] fig = plt.figure() ax = fig.add_subplot(1,1,1) ax.set_xlim(0,4) percentile_box_plot(ax, data) plt.savefig('example.png') if __name__ == "__main__": example()
The example() method produced the lovely box plot above. If you supply None arguments to either of the whiskers, it won't draw that particular whisker. Anyways, happy plotting.