Package astLib :: Module astStats
[hide private]
[frames] | no frames]

Source Code for Module astLib.astStats

  1  # -*- coding: utf-8 -*- 
  2  """module for performing statistical calculations. 
  3   
  4  (c) 2007-2011 Matt Hilton  
  5   
  6  U{http://astlib.sourceforge.net} 
  7   
  8  This module (as you may notice) provides very few statistical routines. It does, however, provide 
  9  biweight (robust) estimators of location and scale, as described in Beers et al. 1990 (AJ, 100, 
 10  32), in addition to a robust least squares fitting routine that uses the biweight transform. 
 11   
 12  Some routines may fail if they are passed lists with few items and encounter a `divide by zero' 
 13  error. Where this occurs, the function will return None. An error message will be printed to the 
 14  console when this happens if astStats.REPORT_ERRORS=True (the default). Testing if an 
 15  astStats function returns None can be used to handle errors in scripts.  
 16   
 17  For extensive statistics modules, the Python bindings for GNU R (U{http://rpy.sourceforge.net}), or 
 18  SciPy (U{http://www.scipy.org}) are suggested. 
 19   
 20  """ 
 21   
 22  import math 
 23  import numpy 
 24  import sys 
 25   
 26  REPORT_ERRORS=True 
 27   
 28  #--------------------------------------------------------------------------------------------------- 
29 -def mean(dataList):
30 """Calculates the mean average of a list of numbers. 31 32 @type dataList: list 33 @param dataList: input data, must be a one dimensional list 34 @rtype: float 35 @return: mean average 36 37 """ 38 sum=0 39 for item in dataList: 40 sum=sum+float(item) 41 if len(dataList)>0: 42 mean=sum/float(len(dataList)) 43 else: 44 mean=0 45 return mean
46 47 #---------------------------------------------------------------------------------------------------
48 -def weightedMean(dataList):
49 """Calculates the weighted mean average of a two dimensional list (value, weight) of 50 numbers. 51 52 @type dataList: list 53 @param dataList: input data, must be a two dimensional list in format [value, weight] 54 @rtype: float 55 @return: weighted mean average 56 57 """ 58 sum=0 59 weightSum=0 60 for item in dataList: 61 sum=sum+float(item[0]*item[1]) 62 weightSum=weightSum+item[1] 63 if len(dataList)>0: 64 mean=sum/weightSum 65 else: 66 mean=0 67 return mean
68 69 #---------------------------------------------------------------------------------------------------
70 -def stdev(dataList):
71 """Calculates the (sample) standard deviation of a list of numbers. 72 73 @type dataList: list 74 @param dataList: input data, must be a one dimensional list 75 @rtype: float 76 @return: standard deviation 77 78 """ 79 listMean=mean(dataList) 80 sum=0 81 for item in dataList: 82 sum=sum+(float(item-listMean)*float(item-listMean)) 83 if len(dataList)>0: 84 stdev=math.sqrt(sum/(float(len(dataList))-1)) 85 else: 86 stdev=0 87 return stdev
88 89 #---------------------------------------------------------------------------------------------------
90 -def rms(dataList):
91 """Calculates the root mean square of a list of numbers. 92 93 @type dataList: list 94 @param dataList: input data, must be a one dimensional list 95 @rtype: float 96 @return: root mean square 97 98 """ 99 dataListSq=[] 100 for item in dataList: 101 dataListSq.append(item*item) 102 listMeanSq=mean(dataListSq) 103 rms=math.sqrt(listMeanSq) 104 105 return rms
106 107 #---------------------------------------------------------------------------------------------------
108 -def weightedStdev(dataList):
109 """Calculates the weighted (sample) standard deviation of a list of numbers. 110 111 @type dataList: list 112 @param dataList: input data, must be a two dimensional list in format [value, weight] 113 @rtype: float 114 @return: weighted standard deviation 115 116 @note: Returns None if an error occurs. 117 118 """ 119 listMean=weightedMean(dataList) 120 sum=0 121 wSum=0 122 wNonZero=0 123 for item in dataList: 124 if item[1]>0.0: 125 sum=sum+float((item[0]-listMean)/item[1])*float((item[0]-listMean)/item[1]) 126 wSum=wSum+float(1.0/item[1])*float(1.0/item[1]) 127 128 if len(dataList)>1: 129 nFactor=float(len(dataList))/float(len(dataList)-1) 130 stdev=math.sqrt(nFactor*(sum/wSum)) 131 else: 132 if REPORT_ERRORS==True: 133 print """ERROR: astStats.weightedStdev() : dataList contains < 2 items.""" 134 stdev=None 135 return stdev
136 137 #---------------------------------------------------------------------------------------------------
138 -def median(dataList):
139 """Calculates the median of a list of numbers. 140 141 @type dataList: list 142 @param dataList: input data, must be a one dimensional list 143 @rtype: float 144 @return: median average 145 146 """ 147 dataList.sort() 148 midValue=float(len(dataList)/2.0) 149 fractPart=math.modf(midValue)[0] 150 151 if fractPart==0.5: # if odd number of items 152 midValue=math.ceil(midValue) 153 154 # Doesn't like it when handling a list with only one item in it! 155 if midValue<len(dataList)-1: 156 median=dataList[int(midValue)] 157 158 if fractPart!=0.5: # if even 159 prevItem=dataList[int(midValue)-1] 160 median=(median+prevItem)/2.0 161 162 else: 163 median=dataList[0] 164 165 return median
166 167 #---------------------------------------------------------------------------------------------------
168 -def modeEstimate(dataList):
169 """Returns an estimate of the mode of a set of values by mode=(3*median)-(2*mean). 170 171 @type dataList: list 172 @param dataList: input data, must be a one dimensional list 173 @rtype: float 174 @return: estimate of mode average 175 176 """ 177 mode=(3*median(dataList))-(2*mean(dataList)) 178 179 return mode
180 181 #---------------------------------------------------------------------------------------------------
182 -def MAD(dataList):
183 """Calculates the Median Absolute Deviation of a list of numbers. 184 185 @type dataList: list 186 @param dataList: input data, must be a one dimensional list 187 @rtype: float 188 @return: median absolute deviation 189 190 """ 191 listMedian=median(dataList) 192 193 # Calculate |x-M| values 194 diffModuli=[] 195 for item in dataList: 196 diffModuli.append(math.fabs(item-listMedian)) 197 diffModuli.sort() 198 199 midValue=float(len(diffModuli)/2.0) 200 fractPart=math.modf(midValue)[0] 201 202 if fractPart==0.5: # if odd number of items 203 midValue=math.ceil(midValue) 204 205 # Doesn't like it when handling a list with only one item in it! 206 if midValue<len(diffModuli)-1: 207 MAD=diffModuli[int(midValue)] 208 209 if fractPart!=0.5: # if even 210 prevItem=diffModuli[int(midValue)-1] 211 MAD=(MAD+prevItem)/2.0 212 213 else: 214 MAD=diffModuli[0] 215 216 return MAD
217 218 #---------------------------------------------------------------------------------------------------
219 -def biweightLocation(dataList, tuningConstant):
220 """Calculates the biweight location estimator (like a robust average) of a list of 221 numbers. 222 223 @type dataList: list 224 @param dataList: input data, must be a one dimensional list 225 @type tuningConstant: float 226 @param tuningConstant: 6.0 is recommended. 227 @rtype: float 228 @return: biweight location 229 230 @note: Returns None if an error occurs. 231 232 """ 233 C=tuningConstant 234 listMedian=median(dataList) 235 listMAD=MAD(dataList) 236 if listMAD!=0: 237 uValues=[] 238 for item in dataList: 239 uValues.append((item-listMedian)/(C*listMAD)) 240 241 top=0 # numerator equation (5) Beers et al if you like 242 bottom=0 # denominator 243 for i in range(len(uValues)): 244 if math.fabs(uValues[i])<=1.0: 245 top=top+((dataList[i]-listMedian) \ 246 *(1.0-(uValues[i]*uValues[i])) \ 247 *(1.0-(uValues[i]*uValues[i]))) 248 249 bottom=bottom+((1.0-(uValues[i]*uValues[i])) \ 250 *(1.0-(uValues[i]*uValues[i]))) 251 252 CBI=listMedian+(top/bottom) 253 254 else: 255 if REPORT_ERRORS==True: 256 print """ERROR: astStats: biweightLocation() : MAD() returned 0.""" 257 return None 258 259 return CBI
260 261 #---------------------------------------------------------------------------------------------------
262 -def biweightScale(dataList, tuningConstant):
263 """Calculates the biweight scale estimator (like a robust standard deviation) of a list 264 of numbers. 265 266 @type dataList: list 267 @param dataList: input data, must be a one dimensional list 268 @type tuningConstant: float 269 @param tuningConstant: 9.0 is recommended. 270 @rtype: float 271 @return: biweight scale 272 273 @note: Returns None if an error occurs. 274 275 """ 276 C=tuningConstant 277 278 # Calculate |x-M| values and u values 279 listMedian=median(dataList) 280 listMAD=MAD(dataList) 281 diffModuli=[] 282 for item in dataList: 283 diffModuli.append(math.fabs(item-listMedian)) 284 uValues=[] 285 for item in dataList: 286 try: 287 uValues.append((item-listMedian)/(C*listMAD)) 288 except ZeroDivisionError: 289 if REPORT_ERRORS==True: 290 print """ERROR: astStats.biweightScale() : divide by zero error.""" 291 return None 292 293 top=0 # numerator equation (9) Beers et al 294 bottom=0 295 valCount=0 # Count values where u<1 only 296 297 for i in range(len(uValues)): 298 # Skip u values >1 299 if math.fabs(uValues[i])<=1.0: 300 u2Term=1.0-(uValues[i]*uValues[i]) 301 u4Term=math.pow(u2Term, 4) 302 top=top+((diffModuli[i]*diffModuli[i])*u4Term) 303 bottom=bottom+(u2Term*(1.0-(5.0*(uValues[i]*uValues[i])))) 304 valCount=valCount+1 305 306 top=math.sqrt(top) 307 bottom=math.fabs(bottom) 308 309 SBI=math.pow(float(valCount), 0.5)*(top/bottom) 310 return SBI
311 312 #---------------------------------------------------------------------------------------------------
313 -def biweightClipped(dataList, tuningConstant, sigmaCut):
314 """Iteratively calculates biweight location and scale, using sigma clipping, for a list 315 of values. The calculation is performed on the first column of a multi-dimensional 316 list; other columns are ignored. 317 318 @type dataList: list 319 @param dataList: input data 320 @type tuningConstant: float 321 @param tuningConstant: 6.0 is recommended for location estimates, 9.0 is recommended for 322 scale estimates 323 @type sigmaCut: float 324 @param sigmaCut: sigma clipping to apply 325 @rtype: dictionary 326 @return: estimate of biweight location, scale, and list of non-clipped data, in the format 327 {'biweightLocation', 'biweightScale', 'dataList'} 328 329 @note: Returns None if an error occurs. 330 331 """ 332 333 iterations=0 334 clippedValues=[] 335 for row in dataList: 336 if type(row)==list: 337 clippedValues.append(row[0]) 338 else: 339 clippedValues.append(row) 340 341 while iterations<11 and len(clippedValues)>5: 342 343 cbi=biweightLocation(clippedValues, tuningConstant) 344 sbi=biweightScale(clippedValues, tuningConstant) 345 346 # check for either biweight routine falling over 347 # happens when feed in lots of similar numbers 348 # e.g. when bootstrapping with a small sample 349 if cbi==None or sbi==None: 350 351 if REPORT_ERRORS==True: 352 print """ERROR: astStats : biweightClipped() : 353 divide by zero error.""" 354 355 return None 356 357 else: 358 359 clippedValues=[] 360 clippedData=[] 361 for row in dataList: 362 if type(row)==list: 363 if row[0]>cbi-(sigmaCut*sbi) \ 364 and row[0]<cbi+(sigmaCut*sbi): 365 clippedValues.append(row[0]) 366 clippedData.append(row) 367 else: 368 if row>cbi-(sigmaCut*sbi) \ 369 and row<cbi+(sigmaCut*sbi): 370 clippedValues.append(row) 371 clippedData.append(row) 372 373 iterations=iterations+1 374 375 return { 'biweightLocation':cbi , 376 'biweightScale':sbi, 377 'dataList':clippedData}
378 379 #---------------------------------------------------------------------------------------------------
380 -def biweightTransform(dataList, tuningConstant):
381 """Calculates the biweight transform for a set of values. Useful for using as weights in 382 robust line fitting. 383 384 @type dataList: list 385 @param dataList: input data, must be a one dimensional list 386 @type tuningConstant: float 387 @param tuningConstant: 6.0 is recommended for location estimates, 9.0 is recommended for 388 scale estimates 389 @rtype: list 390 @return: list of biweights 391 392 """ 393 C=tuningConstant 394 395 # Calculate |x-M| values and u values 396 listMedian=abs(median(dataList)) 397 cutoff=C*listMedian 398 biweights=[] 399 for item in dataList: 400 if abs(item)<cutoff: 401 biweights.append([item, 402 (1.0-((item/cutoff)*(item/cutoff))) \ 403 *(1.0-((item/cutoff)*(item/cutoff)))]) 404 else: 405 biweights.append([item, 0.0]) 406 407 return biweights
408 409 #---------------------------------------------------------------------------------------------------
410 -def OLSFit(dataList):
411 """Performs an ordinary least squares fit on a two dimensional list of numbers. 412 Minimum number of data points is 5. 413 414 @type dataList: list 415 @param dataList: input data, must be a two dimensional list in format [x, y] 416 @rtype: dictionary 417 @return: slope and intercept on y-axis, with associated errors, in the format 418 {'slope', 'intercept', 'slopeError', 'interceptError'} 419 420 @note: Returns None if an error occurs. 421 422 """ 423 sumX=0 424 sumY=0 425 sumXY=0 426 sumXX=0 427 n=float(len(dataList)) 428 if n > 2: 429 for item in dataList: 430 sumX=sumX+item[0] 431 sumY=sumY+item[1] 432 sumXY=sumXY+(item[0]*item[1]) 433 sumXX=sumXX+(item[0]*item[0]) 434 m=((n*sumXY)-(sumX*sumY))/((n*sumXX)-(sumX*sumX)) 435 c=((sumXX*sumY)-(sumX*sumXY))/((n*sumXX)-(sumX*sumX)) 436 437 sumRes=0 438 for item in dataList: 439 440 sumRes=sumRes+((item[1]-(m*item[0])-c) \ 441 *(item[1]-(m*item[0])-c)) 442 443 sigma=math.sqrt((1.0/(n-2))*sumRes) 444 445 mSigma=(sigma*math.sqrt(n))/math.sqrt((n*sumXX)-(sumX*sumX)) 446 cSigma=(sigma*math.sqrt(sumXX))/math.sqrt((n*sumXX)-(sumX*sumX)) 447 else: 448 if REPORT_ERRORS==True: 449 print """ERROR: astStats.OLSFit() : dataList contains < 3 items.""" 450 451 return None 452 453 return {'slope':m, 454 'intercept':c, 455 'slopeError':mSigma, 456 'interceptError':cSigma}
457 458 #---------------------------------------------------------------------------------------------------
459 -def clippedMeanStdev(dataList, sigmaCut = 3.0, maxIterations = 10.0):
460 """Calculates the clipped mean and stdev of a list of numbers. 461 462 @type dataList: list 463 @param dataList: input data, one dimensional list of numbers 464 @type sigmaCut: float 465 @param sigmaCut: clipping in Gaussian sigma to apply 466 @type maxIterations: int 467 @param maxIterations: maximum number of iterations 468 @rtype: dictionary 469 @return: format {'clippedMean', 'clippedStdev', 'numPoints'} 470 471 """ 472 473 listCopy=[] 474 for d in dataList: 475 listCopy.append(d) 476 listCopy=numpy.array(listCopy) 477 478 iterations=0 479 while iterations < maxIterations and len(listCopy) > 4: 480 481 m=listCopy.mean() 482 s=listCopy.std() 483 484 listCopy=listCopy[numpy.less(abs(listCopy), abs(m+sigmaCut*s))] 485 486 iterations=iterations+1 487 488 return {'clippedMean': m, 'clippedStdev': s, 'numPoints': listCopy.shape[0]}
489 490 #---------------------------------------------------------------------------------------------------
491 -def clippedWeightedLSFit(dataList, sigmaCut):
492 """Performs a weighted least squares fit on a list of numbers with sigma clipping. Minimum number of data 493 points is 5. 494 495 @type dataList: list 496 @param dataList: input data, must be a three dimensional list in format [x, y, y weight] 497 @rtype: dictionary 498 @return: slope and intercept on y-axis, with associated errors, in the format 499 {'slope', 'intercept', 'slopeError', 'interceptError'} 500 501 @note: Returns None if an error occurs. 502 503 """ 504 505 iterations=0 506 clippedValues=[] 507 for row in dataList: 508 clippedValues.append(row) 509 510 while iterations<11 and len(clippedValues)>4: 511 512 fitResults=weightedLSFit(clippedValues, "errors") 513 514 if fitResults['slope'] == None: 515 516 if REPORT_ERRORS==True: 517 print """ERROR: astStats : clippedWeightedLSFit() : 518 divide by zero error.""" 519 520 return None 521 522 else: 523 524 clippedValues=[] 525 for row in dataList: 526 527 # Trim points more than sigmaCut*sigma away from the fitted line 528 fit=fitResults['slope']*row[0]+fitResults['intercept'] 529 res=row[1]-fit 530 if abs(res)/row[2] < sigmaCut: 531 clippedValues.append(row) 532 533 iterations=iterations+1 534 535 # store the number of values that made it through the clipping process 536 fitResults['numDataPoints']=len(clippedValues) 537 538 return fitResults
539 540 #---------------------------------------------------------------------------------------------------
541 -def weightedLSFit(dataList, weightType):
542 """Performs a weighted least squares fit on a three dimensional list of numbers [x, y, y error]. 543 544 @type dataList: list 545 @param dataList: input data, must be a three dimensional list in format [x, y, y error] 546 @type weightType: string 547 @param weightType: if "errors", weights are calculated assuming the input data is in the 548 format [x, y, error on y]; if "weights", the weights are assumed to be already calculated and 549 stored in a fourth column [x, y, error on y, weight] (as used by e.g. L{astStats.biweightLSFit}) 550 @rtype: dictionary 551 @return: slope and intercept on y-axis, with associated errors, in the format 552 {'slope', 'intercept', 'slopeError', 'interceptError'} 553 554 @note: Returns None if an error occurs. 555 556 """ 557 if weightType == "weights": 558 sumW=0 559 sumWX=0 560 sumWY=0 561 sumWXY=0 562 sumWXX=0 563 n=float(len(dataList)) 564 if n > 4: 565 for item in dataList: 566 W=item[3] 567 sumWX=sumWX+(W*item[0]) 568 sumWY=sumWY+(W*item[1]) 569 sumWXY=sumWXY+(W*item[0]*item[1]) 570 sumWXX=sumWXX+(W*item[0]*item[0]) 571 sumW=sumW+W 572 #print sumW, sumWXX, sumWX 573 574 try: 575 m=((sumW*sumWXY)-(sumWX*sumWY)) \ 576 /((sumW*sumWXX)-(sumWX*sumWX)) 577 except ZeroDivisionError: 578 if REPORT_ERRORS == True: 579 print "ERROR: astStats.weightedLSFit() : divide by zero error." 580 return None 581 582 try: 583 c=((sumWXX*sumWY)-(sumWX*sumWXY)) \ 584 /((sumW*sumWXX)-(sumWX*sumWX)) 585 except ZeroDivisionError: 586 if REPORT_ERRORS == True: 587 print "ERROR: astStats.weightedLSFit() : divide by zero error." 588 return None 589 590 sumRes=0 591 for item in dataList: 592 593 sumRes=sumRes+((item[1]-(m*item[0])-c) \ 594 *(item[1]-(m*item[0])-c)) 595 596 sigma=math.sqrt((1.0/(n-2))*sumRes) 597 598 # Can get div0 errors here so check 599 # When biweight fitting converges this shouldn't happen 600 if (n*sumWXX)-(sumWX*sumWX)>0.0: 601 602 mSigma=(sigma*math.sqrt(n)) \ 603 /math.sqrt((n*sumWXX)-(sumWX*sumWX)) 604 605 cSigma=(sigma*math.sqrt(sumWXX)) \ 606 /math.sqrt((n*sumWXX)-(sumWX*sumWX)) 607 608 else: 609 610 if REPORT_ERRORS==True: 611 print """ERROR: astStats.weightedLSFit() 612 : divide by zero error.""" 613 return None 614 615 else: 616 if REPORT_ERRORS==True: 617 print """ERROR: astStats.weightedLSFit() : 618 dataList contains < 5 items.""" 619 return None 620 621 elif weightType == "errors": 622 sumX=0 623 sumY=0 624 sumXY=0 625 sumXX=0 626 sumSigma=0 627 n=float(len(dataList)) 628 for item in dataList: 629 sumX=sumX+(item[0]/(item[2]*item[2])) 630 sumY=sumY+(item[1]/(item[2]*item[2])) 631 sumXY=sumXY+((item[0]*item[1])/(item[2]*item[2])) 632 sumXX=sumXX+((item[0]*item[0])/(item[2]*item[2])) 633 sumSigma=sumSigma+(1.0/(item[2]*item[2])) 634 delta=(sumSigma*sumXX)-(sumX*sumX) 635 m=((sumSigma*sumXY)-(sumX*sumY))/delta 636 c=((sumXX*sumY)-(sumX*sumXY))/delta 637 mSigma=math.sqrt(sumSigma/delta) 638 cSigma=math.sqrt(sumXX/delta) 639 640 return {'slope':m, 641 'intercept':c, 642 'slopeError':mSigma, 643 'interceptError':cSigma}
644 645 #---------------------------------------------------------------------------------------------------
646 -def biweightLSFit(dataList, tuningConstant, sigmaCut = None):
647 """Performs a weighted least squares fit, where the weights used are the biweight 648 transforms of the residuals to the previous best fit .i.e. the procedure is iterative, 649 and converges very quickly (iterations is set to 10 by default). Minimum number of data 650 points is 10. 651 652 This seems to give slightly different results to the equivalent R routine, so use at your 653 own risk! 654 655 @type dataList: list 656 @param dataList: input data, must be a three dimensional list in format [x, y, y weight] 657 @type tuningConstant: float 658 @param tuningConstant: 6.0 is recommended for location estimates, 9.0 is recommended for 659 scale estimates 660 @type sigmaCut: float 661 @param sigmaCut: sigma clipping to apply (set to None if not required) 662 @rtype: dictionary 663 @return: slope and intercept on y-axis, with associated errors, in the format 664 {'slope', 'intercept', 'slopeError', 'interceptError'} 665 666 @note: Returns None if an error occurs. 667 668 """ 669 670 dataCopy=[] 671 for row in dataList: 672 dataCopy.append(row) 673 674 # First perform unweighted fit, then calculate residuals 675 results=OLSFit(dataCopy) 676 origLen=len(dataCopy) 677 for k in range(10): 678 m=results['slope'] 679 c=results['intercept'] 680 res=[] 681 for item in dataCopy: 682 res.append((m*item[0]+c)-item[1]) 683 684 if len(res)>5: 685 # For clipping, trim away things >3 sigma 686 # away from median 687 if sigmaCut != None: 688 absRes=[] 689 for item in res: 690 absRes.append(abs(item)) 691 sigma=stdev(absRes) 692 count=0 693 for item in absRes: 694 if item>(sigmaCut*sigma) \ 695 and len(dataCopy)>2: 696 del dataCopy[count] 697 del res[count] 698 699 # Index of datalist gets out of 700 # sync with absRes as we delete 701 # items 702 count=count-1 703 704 count=count+1 705 706 # Biweight transform residuals 707 weights=biweightTransform(res, tuningConstant) 708 709 # Perform weighted fit, using biweight transforms 710 # of residuals as weight 711 wData=[] 712 for i in range(len(dataCopy)): 713 wData.append([dataCopy[i][0], dataCopy[i][1], dataCopy[i][2], weights[i][1]]) 714 715 results=weightedLSFit(wData, "weights") 716 717 return results
718 719 #---------------------------------------------------------------------------------------------------
720 -def cumulativeBinner(data, binMin, binMax, binTotal):
721 """Bins the input data cumulatively. 722 723 @param data: input data, must be a one dimensional list 724 @type binMin: float 725 @param binMin: minimum value from which to bin data 726 @type binMax: float 727 @param binMax: maximum value from which to bin data 728 @type binTotal: int 729 @param binTotal: number of bins 730 @rtype: list 731 @return: binned data, in format [bin centre, frequency] 732 733 """ 734 #Bin data 735 binStep=float(binMax-binMin)/binTotal 736 bins=[] 737 totalItems=len(data) 738 for i in range(binTotal): 739 bins.append(0) 740 for item in data: 741 if item>(binMin+(i*binStep)): 742 bins[i]=bins[i]+1.0/totalItems 743 744 # Gnuplot requires points at bin midpoints 745 coords=[] 746 for i in range(binTotal): 747 coords.append([binMin+(float(i+0.5)*binStep), bins[i]]) 748 749 return coords
750 751 #---------------------------------------------------------------------------------------------------
752 -def binner(data, binMin, binMax, binTotal):
753 """Bins the input data.. 754 755 @param data: input data, must be a one dimensional list 756 @type binMin: float 757 @param binMin: minimum value from which to bin data 758 @type binMax: float 759 @param binMax: maximum value from which to bin data 760 @type binTotal: int 761 @param binTotal: number of bins 762 @rtype: list 763 @return: binned data, in format [bin centre, frequency] 764 765 """ 766 #Bin data 767 binStep=float(binMax-binMin)/binTotal 768 bins=[] 769 for i in range(binTotal): 770 bins.append(0) 771 for item in data: 772 if item>(binMin+(i*binStep)) \ 773 and item<=(binMin+((i+1)*binStep)): 774 bins[i]=bins[i]+1 775 776 # Gnuplot requires points at bin midpoints 777 coords=[] 778 for i in range(binTotal): 779 coords.append([binMin+(float(i+0.5)*binStep), bins[i]]) 780 781 return coords
782 783 #---------------------------------------------------------------------------------------------------
784 -def weightedBinner(data, weights, binMin, binMax, binTotal):
785 """Bins the input data, recorded frequency is sum of weights in bin. 786 787 @param data: input data, must be a one dimensional list 788 @type binMin: float 789 @param binMin: minimum value from which to bin data 790 @type binMax: float 791 @param binMax: maximum value from which to bin data 792 @type binTotal: int 793 @param binTotal: number of bins 794 @rtype: list 795 @return: binned data, in format [bin centre, frequency] 796 797 """ 798 #Bin data 799 binStep=float(binMax-binMin)/binTotal 800 bins=[] 801 for i in range(binTotal): 802 bins.append(0.0) 803 for item, weight in zip(data, weights): 804 if item>(binMin+(i*binStep)) \ 805 and item<=(binMin+((i+1)*binStep)): 806 bins[i]=bins[i]+weight 807 808 # Gnuplot requires points at bin midpoints 809 coords=[] 810 for i in range(binTotal): 811 coords.append([binMin+(float(i+0.5)*binStep), bins[i]]) 812 813 return coords
814 815 #--------------------------------------------------------------------------------------------------- 816