Changeset 1000

Show
Ignore:
Timestamp:
07/12/12 22:01:22 (10 months ago)
Author:
jjr8
Message:

Continued to overhaul the predictive modeling tools.

Location:
MGET/Branches/Jason/PythonPackage/src/GeoEco/Statistics
Files:
4 modified

Legend:

Unmodified
Added
Removed
  • MGET/Branches/Jason/PythonPackage/src/GeoEco/Statistics/FitGAMForDataframe.r

    r973 r1000  
    135135        if (writeTermPlots && hasTerms) 
    136136        { 
    137             message("Plotting model terms...") 
     137            message("\nPlotting model terms...") 
    138138             
    139139            # If the caller requested that terms be plotted on a 
  • MGET/Branches/Jason/PythonPackage/src/GeoEco/Statistics/FitTreeModelForDataframe.r

    r973 r1000  
    117117                              paste(capture.output(print(tree)), sep="", collapse="\n"), "\n", 
    118118                              sep="") 
     119                               
     120        if (tree$method != "class") 
     121            modelSummary <- paste(modelSummary, 
     122                                  "\n", 
     123                                  sprintf("Deviance explained = %.1f%%  (calculated as (1 - R^2) * 100)\n", (1 - tree$cptable[nrow(tree$cptable),"rel error"]) * 100), 
     124                                  sep="") 
    119125         
    120126        if (!is.null(pruningMethod)) 
     127        { 
    121128            modelSummary <- paste(modelSummary, 
    122129                                  "\n", 
     
    128135                                  sep="") 
    129136 
     137            if (tree$method != "class") 
     138                modelSummary <- paste(modelSummary, 
     139                                      "\n", 
     140                                      sprintf("Deviance explained = %.1f%%  (calculated as (1 - R^2) * 100)\n", (1 - prunedTree$cptable[nrow(prunedTree$cptable),"rel error"]) * 100), 
     141                                      sep="") 
     142        } 
     143 
    130144        writeLines(modelSummary) 
    131145        message("") 
  • MGET/Branches/Jason/PythonPackage/src/GeoEco/Statistics/Modeling.py

    r998 r1000  
    118118 
    119119    @classmethod 
     120    def PredictFromArcGISTable(cls, inputModelFile, inputTable, actualValuesField=None, predictedValuesField=None, cutoff=None, 
     121                               where=None, ignoreOutOfRangeValues=True, noDataValue=None, 
     122                               outputPlotFile=None, measure1=u'tpr', measure2=u'fpr', colorize=True,  
     123                               res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white', 
     124                               overwriteExisting=False): 
     125        cls.__doc__.Obj.ValidateMethodInvocation() 
     126 
     127    @classmethod 
    120128    def PredictFromArcGISRasters(cls, inputModelFile, outputResponseRaster, templateRaster=None, rasterPredictorNames=None, predictorRasters=None, constantPredictorNames=None, constantPredictorValues=None, cutoff=None, resamplingTechniques=None, ignoreOutOfRangeValues=True, outputErrorRaster=None, buildPyramids=False, overwriteExisting=False): 
    121129        cls.__doc__.Obj.ValidateMethodInvocation() 
     
    226234        finally: 
    227235            r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName)) 
     236 
     237    @classmethod 
     238    def PredictFromArcGISTable(cls, inputModelFile, inputTable, actualValuesField=None, predictedValuesField=None, cutoff=None, 
     239                               where=None, ignoreOutOfRangeValues=True, noDataValue=None, 
     240                               outputPlotFile=None, measure1=u'tpr', measure2=u'fpr', colorize=True,  
     241                               res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white', 
     242                               overwriteExisting=False): 
     243        cls.__doc__.Obj.ValidateMethodInvocation() 
    228244 
    229245    @classmethod 
     
    432448 
    433449    @classmethod 
     450    def PredictFromArcGISTable(cls, inputModelFile, inputTable, actualValuesField=None, predictedValuesField=None, cutoff=None, 
     451                               where=None, ignoreOutOfRangeValues=True, noDataValue=None, 
     452                               outputPlotFile=None, measure1=u'tpr', measure2=u'fpr', colorize=True,  
     453                               res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white', 
     454                               overwriteExisting=False): 
     455        cls.__doc__.Obj.ValidateMethodInvocation() 
     456 
     457    @classmethod 
    434458    def PredictFromArcGISRasters(cls, inputModelFile, outputResponseRaster, templateRaster=None, rasterPredictorNames=None, predictorRasters=None, constantPredictorNames=None, constantPredictorValues=None, cutoff=None, resamplingTechniques=None, ignoreOutOfRangeValues=True, buildPyramids=False, overwriteExisting=False): 
    435459        cls.__doc__.Obj.ValidateMethodInvocation() 
     
    643667        finally: 
    644668            r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName)) 
     669 
     670    @classmethod 
     671    def PredictFromArcGISTable(cls, inputModelFile, inputTable, actualValuesField=None, predictedValuesField=None, cutoff=None, 
     672                               where=None, ignoreOutOfRangeValues=True, noDataValue=None, 
     673                               outputPlotFile=None, measure1=u'tpr', measure2=u'fpr', colorize=True,  
     674                               res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white', 
     675                               overwriteExisting=False): 
     676        cls.__doc__.Obj.ValidateMethodInvocation() 
    645677 
    646678    @classmethod 
     
    12071239    return dataFrameName, xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam, familyParam 
    12081240 
    1209 def _PredictFromArcGISRasters(modelType, fitToolName, inputModelFile, outputResponseRaster, cutoff, constantPredictorNames, constantPredictorValues, rasterPredictorNames, predictorRasters, templateRaster, resamplingTechniques, ignoreOutOfRangeValues, outputErrorRaster, buildPyramids, overwriteExisting): 
    1210  
    1211     # Perform additional parameter validation. 
    1212  
    1213     if constantPredictorNames is None and constantPredictorValues is not None or constantPredictorNames is not None and constantPredictorValues is None: 
    1214         Logger.RaiseException(ValueError(_(u'The lists of constant predictor variables names and values must both be specified, or neither must be specified.'))) 
    1215  
    1216     if rasterPredictorNames is None and predictorRasters is not None or rasterPredictorNames is not None and predictorRasters is None: 
    1217         Logger.RaiseException(ValueError(_(u'The lists of raster predictor variable names and rasters must both be specified, or neither must be specified.'))) 
    1218  
    1219     if constantPredictorNames is not None and len(set(constantPredictorNames)) != len(constantPredictorNames): 
    1220         Logger.RaiseException(ValueError(_(u'The list of constant predictors must not contain duplicates.'))) 
    1221  
    1222     if rasterPredictorNames is not None and len(set(rasterPredictorNames)) != len(rasterPredictorNames): 
    1223         Logger.RaiseException(ValueError(_(u'The list of raster predictors must not contain duplicates.'))) 
    1224  
    1225     if constantPredictorNames is not None and rasterPredictorNames is not None and len(set(constantPredictorNames).intersection(set(rasterPredictorNames))) > 0: 
    1226         Logger.RaiseException(ValueError(_(u'The same predictor must not appear in both the list of constant predictors and the list of raster predictors.'))) 
    1227          
    1228     gp = GeoprocessorManager.GetWrappedGeoprocessor() 
    1229  
    1230     if predictorRasters is not None: 
    1231         for raster in predictorRasters: 
    1232             d = gp.Describe(raster) 
    1233             if d.SpatialReference.Name is None or d.SpatialReference.Name.lower() == u'unknown': 
    1234                 Logger.RaiseException(ValueError(_(u'The predictor raster %(raster)s does not have a coordinate system defined. Please define the coordinate system for this raster and try again. (Use the ArcGIS Define Projection tool.)') % {u'raster': raster})) 
    1235  
    1236         if outputResponseRaster.lower() in map(lambda s: unicode(s).lower(), predictorRasters): 
    1237             Logger.RaiseException(ValueError(_(u'The output response raster %(out)s also appears in the list of input predictor rasters. This is not allowed. Please specify a different output response raster or remove it from the list of input predictor rasters.') % {u'out': outputResponseRaster})) 
    1238  
    1239         if outputErrorRaster is not None and outputErrorRaster.lower() in map(lambda s: unicode(s).lower(), predictorRasters): 
    1240             Logger.RaiseException(ValueError(_(u'The output standard error raster %(out)s also appears in the list of input predictor rasters. This is not allowed. Please specify a different output standard error raster or remove it from the list of input predictor rasters.') % {u'out': outputResponseRaster})) 
    1241  
    1242     # Look up the coordinate system, cell size, and extent of the 
    1243     # template raster. 
    1244  
    1245     if templateRaster is None: 
    1246         if predictorRasters is None: 
    1247             Logger.RaiseException(ValueError(_(u'If no predictor rasters are provided, a template raster must be specified.'))) 
    1248         templateRaster = predictorRasters[0] 
    1249  
    1250     templateDescribe = gp.Describe(templateRaster) 
    1251     if templateDescribe.SpatialReference.Name is None or templateDescribe.SpatialReference.Name.lower() == u'unknown': 
    1252         Logger.RaiseException(ValueError(_(u'The template raster %(raster)s does not have a coordinate system defined. Please define the coordinate system for this raster and try again. (Use the ArcGIS Define Projection tool.)') % {u'raster': templateRaster})) 
    1253  
    1254     coordinateSystem = gp.CreateSpatialReference(templateDescribe.SpatialReference).split(';')[0] 
    1255     cellSize = templateDescribe.MeanCellWidth 
    1256     extent = templateDescribe.Extent 
    1257  
     1241def _PredictFromArcGISTable(modelType, fitToolName, inputModelFile, inputTable, actualValuesField, predictedValuesField, cutoff, where, ignoreOutOfRangeValues, outputROCPlotFile, plotFileFormat, res, width, height, pointSize, bg, overwriteExisting): 
     1242 
     1243    # If the caller requested emf format for the plot file, 
     1244    # convert the width and height from thousands of an inch to 
     1245    # inches. 
     1246 
     1247    if outputPlotFile is not None and outputPlotFile.lower().endswith(u'.emf'): 
     1248        width = width / 1000 
     1249        height = height / 1000 
     1250     
    12581251    # Load the fitted model. 
    12591252 
     
    12771270            dependency = RPackageDependency(unicode(r['rPackage'])) 
    12781271            dependency.Initialize() 
    1279  
    1280         # If it is a GAM fitted by the R gam package, report an error 
    1281         # if the caller specified an outputErrorRaster, because that 
    1282         # package's predict.gam function cannot generate predictions 
    1283         # from the newdata parameter. 
    1284  
    1285         if outputErrorRaster is not None and modelType.lower() == 'gam' and r['rPackage'] == 'gam': 
    1286             Logger.RaiseException(ValueError(_('The input model from file %(file)s is a GAM that was fitted using the R gam package. Because the gam package cannot return standard errors for responses predicted from data other than those used to fit the model, a standard error raster cannot be produced. Please do not specify a standard error raster, or, if you must have one, re-fit the GAM using the R mgcv package.') % {u'file': inputModelFile})) 
    12871272 
    12881273        # If it is a GAM fitted by the R gam package, we have to work 
     
    13021287            r('fam20985982305 <<- model$family') 
    13031288 
    1304         # Log a summery of the model, to remind the user what it is. 
    1305  
    1306         if r('exists("rPackage")') and r['rPackage'] not in ('rpart', 'randomForest', 'party'): 
    1307             R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[GLM.__module__].__file__), 'SummarizeModel.r'), False) 
    1308  
    1309             r('writeLines("")') 
    1310             r('writeLines("MODEL SUMMARY:")') 
    1311             r('writeLines("==============")') 
    1312             r('writeLines(SummarizeModel(model))') 
    1313             r('writeLines("")') 
    1314  
    1315         # If the caller provided a GLM or GAM without any 
    1316         # predictor variables, then the predicted response and 
    1317         # standard errors will be constant values. Report a warning 
    1318         # and create constant rasters having these values. 
    1319  
    1320         if (not r('exists("rPackage")') or r['rPackage'] in ['glm', 'gam', 'mgcv']) and isinstance(r('all.vars(model$formula)'), basestring):        # If only one term is in the model (i.e. the response variable), rpy will return it as a string, rather than a list with one string in it. 
    1321             if outputErrorRaster is not None: 
    1322                 Logger.Warning(_(u'The model\'s formula does not contain any predictor variables. Because of this, the output response and standard error rasters will contain constant values.')) 
    1323             else: 
    1324                 Logger.Warning(_(u'The model\'s formula does not contain any predictor variables. Because of this, the output response raster will contain a constant value.')) 
    1325  
    1326             responseValue = r('predict(model, newdata=data.frame(0), type="response")') 
    1327             gp.CreateRandomRaster_management(os.path.dirname(outputResponseRaster), os.path.basename(outputResponseRaster), u'UNIFORM %g %g' % (responseValue, responseValue), extent, cellSize) 
    1328             gp.DefineProjection_management(outputResponseRaster, coordinateSystem) 
    1329             if buildPyramids: 
    1330                 gp.BuildPyramids_Management(outputResponseRaster) 
    1331  
    1332             if outputErrorRaster is not None: 
    1333                 errorValue = r('predict(model, newdata=data.frame(0), type="response", se.fit=TRUE)$se.fit') 
    1334                 gp.CreateRandomRaster_management(os.path.dirname(outputErrorRaster), os.path.basename(outputErrorRaster), u'UNIFORM %g %g' % (errorValue, errorValue), extent, cellSize) 
    1335                 gp.DefineProjection_management(outputErrorRaster, coordinateSystem) 
    1336                 if buildPyramids: 
    1337                     gp.BuildPyramids_Management(outputResponseRaster) 
    1338  
    1339         # Otherwise, the model contains at least one predictor 
    1340         # variable, do the prediction. 
    1341  
    1342         else: 
    1343             tempDir = TemporaryDirectory() 
    1344             
    1345             # First prepare the input rasters for the prediction. 
    1346  
    1347             _PreparePredictorRasters(r, gp, tempDir, constantPredictorNames, rasterPredictorNames, predictorRasters, templateRaster, templateDescribe, coordinateSystem, cellSize, extent, resamplingTechniques) 
    1348  
    1349             # Do the prediction. This creates files in GDAL-compatible 
    1350             # .bil format. 
    1351             # 
    1352             # Force the logging system to log R Info messages as 
    1353             # Debug so that the user is not spammed with "Closing 
    1354             # GDAL dataset handle" messages. There appears to be 
    1355             # no way to suppress these messages from within R. 
    1356  
    1357             Logger.Info(_(u'Predicting...')) 
    1358  
    1359             R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[GLM.__module__].__file__), 'Utils.r'), False) 
    1360             R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[GLM.__module__].__file__), 'PredictLMForArcGISRasters.r'), False) 
    1361  
    1362             oldLoggingLevel = r.LogInfoAsDebug 
    1363             r.LogInfoAsDebug = True 
    1364             try: 
    1365                 if r('exists("rPackage")') and r['rPackage'] in ['randomForest', 'party']: 
    1366                     trainingData = 'data' 
    1367                 else: 
    1368                     trainingData = 'model$data' 
    1369  
    1370                 if r('exists("rPackage")'): 
    1371                     rPackage = '"' + r['rPackage'] + '"' 
    1372                 else: 
    1373                     rPackage = 'NULL' 
    1374  
    1375                 if constantPredictorNames is not None and constantPredictorValues is not None: 
    1376                     for i in range(len(constantPredictorValues)): 
    1377                         try: 
    1378                             constantPredictorValues[i] = int(constantPredictorValues[i]) 
    1379                         except: 
    1380                             try: 
    1381                                 constantPredictorValues[i] = float(constantPredictorValues[i]) 
    1382                             except: 
    1383                                 pass 
    1384                     r['constantsForPredictors'] = dict(zip(constantPredictorNames, constantPredictorValues)) 
    1385                 else: 
    1386                     r('constantsForPredictors <- NULL') 
    1387  
    1388                 if cutoff is not None: 
    1389                     cutoff = repr(cutoff) 
    1390                 else: 
    1391                     cutoff = 'NULL' 
    1392  
    1393                 #constantPredictorNames, constantPredictorValues 
    1394  
    1395                 tempResponseFile = os.path.join(tempDir.Path, u'temp_response') 
    1396                  
    1397                 if outputErrorRaster is None: 
    1398                     r('PredictLMForArcGISRasters(model, %s, %s, rastersForPredictors, constantsForPredictors, "%s", ignoreOutOfRangeValues=%s, cutoff=%s)' % (trainingData, rPackage, tempResponseFile.replace('\\', '\\\\'), str(ignoreOutOfRangeValues).upper(), cutoff))      # TODO: Fix this to account for cutoff and constant values 
    1399                 else: 
    1400                     tempErrorFile = os.path.join(tempDir.Path, u'temp_standarderror') 
    1401                     r('PredictLMForArcGISRasters(model, %s, %s, rastersForPredictors, constantsForPredictors, "%s", "%s", ignoreOutOfRangeValues=%s, cutoff=%s)' % (trainingData, rPackage, tempResponseFile.replace('\\', '\\\\'), tempErrorFile.replace('\\', '\\\\'), str(ignoreOutOfRangeValues).upper(), cutoff)) 
    1402             finally: 
    1403                 r.LogInfoAsDebug = oldLoggingLevel 
    1404  
    1405             # Copy the .bil files to the destination rasters. 
    1406  
    1407             Logger.Info(_(u'Creating outputs...')) 
    1408  
    1409             from GeoEco.Datasets import Dataset 
    1410             from GeoEco.Datasets.GDAL import GDALDataset 
    1411             from GeoEco.Datasets.ArcGIS import ArcGISWorkspace, ArcGISRaster 
    1412  
    1413             bilFile = GDALDataset(tempResponseFile + '.bil', lazyPropertyValues={'SpatialReference': Dataset.ConvertSpatialReference('arcgis', coordinateSystem, 'obj')}) 
    1414             workspace = ArcGISWorkspace(os.path.dirname(outputResponseRaster), ArcGISRaster, pathCreationExpressions=[os.path.basename(outputResponseRaster)]) 
    1415             try: 
    1416                 workspace.ImportDatasets(bilFile.QueryDatasets(reportProgress=False), {False: 'Add', True: 'Replace'}[overwriteExisting], reportProgress=False, calculateStatistics=True, buildRAT=True, buildPyramids=buildPyramids) 
    1417             finally: 
    1418                 del bilFile 
    1419                 del workspace 
    1420  
    1421             if outputErrorRaster is not None: 
    1422                 bilFile = GDALDataset(tempErrorFile + '.bil', lazyPropertyValues={'SpatialReference': Dataset.ConvertSpatialReference('arcgis', coordinateSystem, 'obj')}) 
    1423                 workspace = ArcGISWorkspace(os.path.dirname(outputErrorRaster), ArcGISRaster, pathCreationExpressions=[os.path.basename(outputErrorRaster)]) 
    1424                 try: 
    1425                     workspace.ImportDatasets(bilFile.QueryDatasets(reportProgress=False), {False: 'Add', True: 'Replace'}[overwriteExisting], reportProgress=False, calculateStatistics=True, buildPyramids=buildPyramids) 
    1426                 finally: 
    1427                     del bilFile 
    1428                     del workspace 
    1429  
    14301289    # Delete R variables assigned by this function. 
    14311290     
     
    14441303        r('if (exists("fam20985982305")) rm("fam20985982305", pos=globalenv())') 
    14451304 
     1305def _PredictFromArcGISRasters(modelType, fitToolName, inputModelFile, outputResponseRaster, cutoff, constantPredictorNames, constantPredictorValues, rasterPredictorNames, predictorRasters, templateRaster, resamplingTechniques, ignoreOutOfRangeValues, outputErrorRaster, buildPyramids, overwriteExisting): 
     1306 
     1307    # Perform additional parameter validation. 
     1308 
     1309    if constantPredictorNames is None and constantPredictorValues is not None or constantPredictorNames is not None and constantPredictorValues is None: 
     1310        Logger.RaiseException(ValueError(_(u'The lists of constant predictor variables names and values must both be specified, or neither must be specified.'))) 
     1311 
     1312    if rasterPredictorNames is None and predictorRasters is not None or rasterPredictorNames is not None and predictorRasters is None: 
     1313        Logger.RaiseException(ValueError(_(u'The lists of raster predictor variable names and rasters must both be specified, or neither must be specified.'))) 
     1314 
     1315    if constantPredictorNames is not None and len(set(constantPredictorNames)) != len(constantPredictorNames): 
     1316        Logger.RaiseException(ValueError(_(u'The list of constant predictors must not contain duplicates.'))) 
     1317 
     1318    if rasterPredictorNames is not None and len(set(rasterPredictorNames)) != len(rasterPredictorNames): 
     1319        Logger.RaiseException(ValueError(_(u'The list of raster predictors must not contain duplicates.'))) 
     1320 
     1321    if constantPredictorNames is not None and rasterPredictorNames is not None and len(set(constantPredictorNames).intersection(set(rasterPredictorNames))) > 0: 
     1322        Logger.RaiseException(ValueError(_(u'The same predictor must not appear in both the list of constant predictors and the list of raster predictors.'))) 
     1323         
     1324    gp = GeoprocessorManager.GetWrappedGeoprocessor() 
     1325 
     1326    if predictorRasters is not None: 
     1327        for raster in predictorRasters: 
     1328            d = gp.Describe(raster) 
     1329            if d.SpatialReference.Name is None or d.SpatialReference.Name.lower() == u'unknown': 
     1330                Logger.RaiseException(ValueError(_(u'The predictor raster %(raster)s does not have a coordinate system defined. Please define the coordinate system for this raster and try again. (Use the ArcGIS Define Projection tool.)') % {u'raster': raster})) 
     1331 
     1332        if outputResponseRaster.lower() in map(lambda s: unicode(s).lower(), predictorRasters): 
     1333            Logger.RaiseException(ValueError(_(u'The output response raster %(out)s also appears in the list of input predictor rasters. This is not allowed. Please specify a different output response raster or remove it from the list of input predictor rasters.') % {u'out': outputResponseRaster})) 
     1334 
     1335        if outputErrorRaster is not None and outputErrorRaster.lower() in map(lambda s: unicode(s).lower(), predictorRasters): 
     1336            Logger.RaiseException(ValueError(_(u'The output standard error raster %(out)s also appears in the list of input predictor rasters. This is not allowed. Please specify a different output standard error raster or remove it from the list of input predictor rasters.') % {u'out': outputResponseRaster})) 
     1337 
     1338    # Look up the coordinate system, cell size, and extent of the 
     1339    # template raster. 
     1340 
     1341    if templateRaster is None: 
     1342        if predictorRasters is None: 
     1343            Logger.RaiseException(ValueError(_(u'If no predictor rasters are provided, a template raster must be specified.'))) 
     1344        templateRaster = predictorRasters[0] 
     1345 
     1346    templateDescribe = gp.Describe(templateRaster) 
     1347    if templateDescribe.SpatialReference.Name is None or templateDescribe.SpatialReference.Name.lower() == u'unknown': 
     1348        Logger.RaiseException(ValueError(_(u'The template raster %(raster)s does not have a coordinate system defined. Please define the coordinate system for this raster and try again. (Use the ArcGIS Define Projection tool.)') % {u'raster': templateRaster})) 
     1349 
     1350    coordinateSystem = gp.CreateSpatialReference(templateDescribe.SpatialReference).split(';')[0] 
     1351    cellSize = templateDescribe.MeanCellWidth 
     1352    extent = templateDescribe.Extent 
     1353 
     1354    # Load the fitted model. 
     1355 
     1356    r = R.GetInterpreter() 
     1357 
     1358    r('load("%s")' % inputModelFile.replace('\\', '\\\\')) 
     1359    try: 
     1360        if not r('exists("model")'): 
     1361            Logger.RaiseException(ValueError(_('The input model file %(file)s does not contain a variable called "model", indicating that it was not generated by the Fit %(type)s tool. Please provide a file that was generated by the %(fitToolName)s tool.') % {u'file': inputModelFile, u'fitToolName': fitToolName})) 
     1362        if modelType.lower() not in r('tolower(class(model))'): 
     1363            Logger.RaiseException(ValueError(_('The input model file %(file)s contains a variable called "model" but it is not a %(type)s, indicating that it was not generated by the %(fitToolName)s tool. Please provide a file that was generated by the %(fitToolName)s tool.') % {u'file': inputModelFile, u'type': modelType, u'fitToolName': fitToolName})) 
     1364 
     1365        if r('exists("rPackage")'): 
     1366            Logger.Info(_(u'Loaded %(type)s from %(file)s. The model was fitted with the R %(pkg)s package.') % {u'type': r('class(model)[1]').upper(), u'file': inputModelFile, u'pkg': r['rPackage']}) 
     1367        else: 
     1368            Logger.Info(_(u'Loaded %(type)s from %(file)s.') % {u'type': r('class(model)[1]').upper(), u'file': inputModelFile, }) 
     1369 
     1370        # If the model required an R package, load it. 
     1371 
     1372        if r('exists("rPackage")'): 
     1373            dependency = RPackageDependency(unicode(r['rPackage'])) 
     1374            dependency.Initialize() 
     1375 
     1376        # If it is a GAM fitted by the R gam package, report an error 
     1377        # if the caller specified an outputErrorRaster, because that 
     1378        # package's predict.gam function cannot generate predictions 
     1379        # from the newdata parameter. 
     1380 
     1381        if outputErrorRaster is not None and modelType.lower() == 'gam' and r['rPackage'] == 'gam': 
     1382            Logger.RaiseException(ValueError(_('The input model from file %(file)s is a GAM that was fitted using the R gam package. Because the gam package cannot return standard errors for responses predicted from data other than those used to fit the model, a standard error raster cannot be produced. Please do not specify a standard error raster, or, if you must have one, re-fit the GAM using the R mgcv package.') % {u'file': inputModelFile})) 
     1383 
     1384        # If it is a GAM fitted by the R gam package, we have to work 
     1385        # around a bug in predict.gam. The situation is described in 
     1386        # more detail in a message to the R help mailing list titled 
     1387        # "use of step.gam (from package 'gam') and superassignment 
     1388        # inside functions" dated 28-Feb-2008 (see 
     1389        # https://stat.ethz.ch/pipermail/r-help/2008-February/155586.html). 
     1390        # Define uniquely-named global variables for the formula, data 
     1391        # and family parameters. This is very hacky but it is the only 
     1392        # way I could get it working. Note the use of the <<- 
     1393        # operator. Hopefully these names are unique. 
     1394         
     1395        if modelType.lower() == 'gam' and r['rPackage'] == 'gam': 
     1396            r('f20985982305 <<- model$formula') 
     1397            r('data20985982305 <<- model$data') 
     1398            r('fam20985982305 <<- model$family') 
     1399 
     1400        # If the caller provided a GLM or GAM without any 
     1401        # predictor variables, then the predicted response and 
     1402        # standard errors will be constant values. Report a warning 
     1403        # and create constant rasters having these values. 
     1404 
     1405        if (not r('exists("rPackage")') or r['rPackage'] in ['glm', 'gam', 'mgcv']) and isinstance(r('all.vars(model$formula)'), basestring):        # If only one term is in the model (i.e. the response variable), rpy will return it as a string, rather than a list with one string in it. 
     1406            if outputErrorRaster is not None: 
     1407                Logger.Warning(_(u'The model\'s formula does not contain any predictor variables. Because of this, the output response and standard error rasters will contain constant values.')) 
     1408            else: 
     1409                Logger.Warning(_(u'The model\'s formula does not contain any predictor variables. Because of this, the output response raster will contain a constant value.')) 
     1410 
     1411            responseValue = r('predict(model, newdata=data.frame(0), type="response")') 
     1412            gp.CreateRandomRaster_management(os.path.dirname(outputResponseRaster), os.path.basename(outputResponseRaster), u'UNIFORM %g %g' % (responseValue, responseValue), extent, cellSize) 
     1413            gp.DefineProjection_management(outputResponseRaster, coordinateSystem) 
     1414            if buildPyramids: 
     1415                gp.BuildPyramids_Management(outputResponseRaster) 
     1416 
     1417            if outputErrorRaster is not None: 
     1418                errorValue = r('predict(model, newdata=data.frame(0), type="response", se.fit=TRUE)$se.fit') 
     1419                gp.CreateRandomRaster_management(os.path.dirname(outputErrorRaster), os.path.basename(outputErrorRaster), u'UNIFORM %g %g' % (errorValue, errorValue), extent, cellSize) 
     1420                gp.DefineProjection_management(outputErrorRaster, coordinateSystem) 
     1421                if buildPyramids: 
     1422                    gp.BuildPyramids_Management(outputResponseRaster) 
     1423 
     1424        # Otherwise, the model contains at least one predictor 
     1425        # variable, do the prediction. 
     1426 
     1427        else: 
     1428            tempDir = TemporaryDirectory() 
     1429            
     1430            # First prepare the input rasters for the prediction. 
     1431 
     1432            _PreparePredictorRasters(r, gp, tempDir, constantPredictorNames, rasterPredictorNames, predictorRasters, templateRaster, templateDescribe, coordinateSystem, cellSize, extent, resamplingTechniques) 
     1433 
     1434            # Do the prediction. This creates files in GDAL-compatible 
     1435            # .bil format. 
     1436            # 
     1437            # Force the logging system to log R Info messages as 
     1438            # Debug so that the user is not spammed with "Closing 
     1439            # GDAL dataset handle" messages. There appears to be 
     1440            # no way to suppress these messages from within R. 
     1441 
     1442            Logger.Info(_(u'Predicting...')) 
     1443 
     1444            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[GLM.__module__].__file__), 'Utils.r'), False) 
     1445            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[GLM.__module__].__file__), 'PredictLMForArcGISRasters.r'), False) 
     1446 
     1447            oldLoggingLevel = r.LogInfoAsDebug 
     1448            r.LogInfoAsDebug = True 
     1449            try: 
     1450                if r('exists("rPackage")') and r['rPackage'] in ['randomForest', 'party']: 
     1451                    trainingData = 'data' 
     1452                else: 
     1453                    trainingData = 'model$data' 
     1454 
     1455                if r('exists("rPackage")'): 
     1456                    rPackage = '"' + r['rPackage'] + '"' 
     1457                else: 
     1458                    rPackage = 'NULL' 
     1459 
     1460                if constantPredictorNames is not None and constantPredictorValues is not None: 
     1461                    for i in range(len(constantPredictorValues)): 
     1462                        try: 
     1463                            constantPredictorValues[i] = int(constantPredictorValues[i]) 
     1464                        except: 
     1465                            try: 
     1466                                constantPredictorValues[i] = float(constantPredictorValues[i]) 
     1467                            except: 
     1468                                pass 
     1469                    r['constantsForPredictors'] = dict(zip(constantPredictorNames, constantPredictorValues)) 
     1470                else: 
     1471                    r('constantsForPredictors <- NULL') 
     1472 
     1473                if cutoff is not None: 
     1474                    cutoff = repr(cutoff) 
     1475                else: 
     1476                    cutoff = 'NULL' 
     1477 
     1478                #constantPredictorNames, constantPredictorValues 
     1479 
     1480                tempResponseFile = os.path.join(tempDir.Path, u'temp_response') 
     1481                 
     1482                if outputErrorRaster is None: 
     1483                    r('PredictLMForArcGISRasters(model, %s, %s, rastersForPredictors, constantsForPredictors, "%s", ignoreOutOfRangeValues=%s, cutoff=%s)' % (trainingData, rPackage, tempResponseFile.replace('\\', '\\\\'), str(ignoreOutOfRangeValues).upper(), cutoff))      # TODO: Fix this to account for cutoff and constant values 
     1484                else: 
     1485                    tempErrorFile = os.path.join(tempDir.Path, u'temp_standarderror') 
     1486                    r('PredictLMForArcGISRasters(model, %s, %s, rastersForPredictors, constantsForPredictors, "%s", "%s", ignoreOutOfRangeValues=%s, cutoff=%s)' % (trainingData, rPackage, tempResponseFile.replace('\\', '\\\\'), tempErrorFile.replace('\\', '\\\\'), str(ignoreOutOfRangeValues).upper(), cutoff)) 
     1487            finally: 
     1488                r.LogInfoAsDebug = oldLoggingLevel 
     1489 
     1490            # Copy the .bil files to the destination rasters. 
     1491 
     1492            Logger.Info(_(u'Creating outputs...')) 
     1493 
     1494            from GeoEco.Datasets import Dataset 
     1495            from GeoEco.Datasets.GDAL import GDALDataset 
     1496            from GeoEco.Datasets.ArcGIS import ArcGISWorkspace, ArcGISRaster 
     1497 
     1498            bilFile = GDALDataset(tempResponseFile + '.bil', lazyPropertyValues={'SpatialReference': Dataset.ConvertSpatialReference('arcgis', coordinateSystem, 'obj')}) 
     1499            workspace = ArcGISWorkspace(os.path.dirname(outputResponseRaster), ArcGISRaster, pathCreationExpressions=[os.path.basename(outputResponseRaster)]) 
     1500            try: 
     1501                workspace.ImportDatasets(bilFile.QueryDatasets(reportProgress=False), {False: 'Add', True: 'Replace'}[overwriteExisting], reportProgress=False, calculateStatistics=True, buildRAT=True, buildPyramids=buildPyramids) 
     1502            finally: 
     1503                del bilFile 
     1504                del workspace 
     1505 
     1506            if outputErrorRaster is not None: 
     1507                bilFile = GDALDataset(tempErrorFile + '.bil', lazyPropertyValues={'SpatialReference': Dataset.ConvertSpatialReference('arcgis', coordinateSystem, 'obj')}) 
     1508                workspace = ArcGISWorkspace(os.path.dirname(outputErrorRaster), ArcGISRaster, pathCreationExpressions=[os.path.basename(outputErrorRaster)]) 
     1509                try: 
     1510                    workspace.ImportDatasets(bilFile.QueryDatasets(reportProgress=False), {False: 'Add', True: 'Replace'}[overwriteExisting], reportProgress=False, calculateStatistics=True, buildPyramids=buildPyramids) 
     1511                finally: 
     1512                    del bilFile 
     1513                    del workspace 
     1514 
     1515    # Delete R variables assigned by this function. 
     1516     
     1517    finally: 
     1518        r('if (exists("rastersForPredictors")) rm("rastersForPredictors")') 
     1519        r('if (exists("model")) rm("model")') 
     1520        r('if (exists("data")) rm("data")') 
     1521        r('if (exists("rPackage")) rm("rPackage")') 
     1522        r('if (exists("xVar")) rm("xVar")') 
     1523        r('if (exists("yVar")) rm("yVar")') 
     1524        r('if (exists("zVar")) rm("zVar")') 
     1525        r('if (exists("mVar")) rm("mVar")') 
     1526        r('if (exists("coordinateSystem")) rm("coordinateSystem")') 
     1527        r('if (exists("f20985982305")) rm("f20985982305", pos=globalenv())') 
     1528        r('if (exists("data20985982305")) rm("data20985982305", pos=globalenv())') 
     1529        r('if (exists("fam20985982305")) rm("fam20985982305", pos=globalenv())') 
     1530 
    14461531def _PreparePredictorRasters(r, gp, tempDir, constantPredictorNames, rasterPredictorNames, predictorRasters, templateRaster, templateDescribe, coordinateSystem, cellSize, extent, resamplingTechniques): 
    14471532 
     
    21202205    initializeToArcGISGeoprocessorVariable=u'OverwriteOutput') 
    21212206 
     2207# Public method: GLM.PredictFromArcGISTable 
     2208 
     2209AddMethodMetadata(GLM.PredictFromArcGISTable, 
     2210    shortDescription=_(u'Given a fitted generalized linear model (GLM) and a table containing fields for the predictor variables, this tool predicts the response variable for each row of the table.'), 
     2211    isExposedToPythonCallers=True, 
     2212    isExposedByCOM=True, 
     2213    isExposedAsArcGISTool=True, 
     2214    arcGISDisplayName=_(u'Predict GLM From Table'), 
     2215    arcGISToolCategory=_(u'Statistics\\Model Data\\Generalized Linear Models'), 
     2216    dependencies=[ArcGISDependency(9, 2), RDependency(2, 6, 0), RPackageDependency(u'caret')]) 
     2217 
     2218CopyArgumentMetadata(GLM.FitToArcGISTable, u'cls', GLM.PredictFromArcGISTable, u'cls') 
     2219 
     2220AddArgumentMetadata(GLM.PredictFromArcGISTable, u'inputModelFile', 
     2221    typeMetadata=FileTypeMetadata(mustExist=True), 
     2222    description=_( 
     2223u"""File that contains the fitted model, generated by the Fit GLM 
     2224tool."""), 
     2225    arcGISDisplayName=_(u'Input model file')) 
     2226 
     2227AddArgumentMetadata(GLM.PredictFromArcGISTable, u'inputTable', 
     2228    typeMetadata=ArcGISTableViewTypeMetadata(mustExist=True), 
     2229    description=_( 
     2230u"""ArcGIS table, table view, feature class, or feature layer 
     2231containing the data for which the prediction should be done. 
     2232 
     2233The table must have a field for each predictor variable in the model. 
     2234The field names must be spelled exactly like the predictor variables. 
     2235 
     2236The table may optionally have a field containing the actual (observed) 
     2237values of the response variable. If this field is provided, the tool 
     2238will calculate model performance statistics by comparing the actual 
     2239values to the predicted values. 
     2240 
     2241The table may also optionally have a field that should recieve the 
     2242predicted values."""), 
     2243    arcGISDisplayName=_(u'Input table')) 
     2244 
     2245AddArgumentMetadata(GLM.PredictFromArcGISTable, u'actualValuesField', 
     2246    typeMetadata=ArcGISFieldTypeMetadata(canBeNone=True), 
     2247    description=_( 
     2248u"""Field containing the actual (observed) values of the response 
     2249variable. 
     2250 
     2251This parameter is optional. If it is is provided, the tool will 
     2252calculate model performance statistics by comparing the actual values 
     2253obtained from this field to the predicted values produced by the 
     2254model."""), 
     2255    arcGISDisplayName=_(u'Field containing the actual values of the response variable'), 
     2256    arcGISParameterDependencies=[u'table']) 
     2257 
     2258AddArgumentMetadata(GLM.PredictFromArcGISTable, u'predictedValuesField', 
     2259    typeMetadata=ArcGISFieldTypeMetadata(canBeNone=True, mustBeDifferentThanArguments=[u'actualValuesField']), 
     2260    description=_( 
     2261u"""Field to receive the predicted values produced by the model. 
     2262 
     2263This parameter is optional. It allows you to record the predicted 
     2264values so you perform follow-up analysis."""), 
     2265    arcGISDisplayName=_(u'Field to receive the predicted values of the response variable'), 
     2266    arcGISParameterDependencies=[u'table']) 
     2267 
     2268AddArgumentMetadata(GLM.PredictFromArcGISTable, u'cutoff', 
     2269    typeMetadata=FloatTypeMetadata(minValue=0., maxValue=1., canBeNone=True), 
     2270    description=_( 
     2271u"""Cutoff to use when classifying the continuous probability output 
     2272by a binomial model into a binary result (0 or 1). Probabilities 
     2273greater than or equal to the cutoff are classified as 1; probabilities 
     2274less than the cutoff are classified as 0. 
     2275 
     2276This parameter should not be used for models that are not intended to 
     2277perform binary classification. 
     2278 
     2279For binomial models, if a value is not provided, the tool will select 
     2280one automatically by iterating through the full range of cutoff values 
     2281(0 to 1) and finding the value that maximizes the value of the Youden 
     2282index (see Perkins and Schisterman, 2006) and thereby attempts to 
     2283minimize the misclassification rate of the model. This approach may 
     2284not be optimal for your application; we encourage you to review the 
     2285extensive literature around cutoffs and select a value deliberately. 
     2286 
     2287References 
     2288 
     2289Perkins NJ, Schisterman EF (2006) The Inconsistency of "Optimal" 
     2290Cutpoints Obtained using Two Criteria based on the Receiver Operating 
     2291Characteristic Curve. American Journal of Epidemiology 163: 
     2292670-675."""), 
     2293    arcGISDisplayName=_(u'Binary classification cutoff')) 
     2294 
     2295AddArgumentMetadata(GLM.PredictFromArcGISTable, u'where', 
     2296    typeMetadata=SQLWhereClauseTypeMetadata(canBeNone=True), 
     2297    description=ArcGIS91SelectCursor.__init__.__doc__.Obj.GetArgumentByName(u'where').Description, 
     2298    arcGISParameterDependencies=[u'inputTable'], 
     2299    arcGISDisplayName=_(u'Where clause'), 
     2300    arcGISCategory=_(u'Prediction options')) 
     2301 
     2302AddArgumentMetadata(GLM.PredictFromArcGISTable, u'ignoreOutOfRangeValues', 
     2303    typeMetadata=BooleanTypeMetadata(), 
     2304    description=_( 
     2305u"""If True, predictions will not be made where predictor values fall 
     2306outside of the range of values used to fit the model. 
     2307 
     2308If False, predictions will be attempted regardless of what the 
     2309predictor values are. 
     2310 
     2311This parameter is set to True by default because many believe that it 
     2312is a bad practice to extrapolate a model beyond the range of values 
     2313used to fit it. But if your model provides a very strong fit, or you 
     2314have some other reason to believe it is very robust, you can set this 
     2315parameter to False to perform the extrapolation."""), 
     2316    arcGISDisplayName=_(u'Ignore raster values outside the modeled range'), 
     2317    arcGISCategory=_(u'Prediction options')) 
     2318 
     2319AddArgumentMetadata(GLM.PredictFromArcGISTable, u'noDataValue', 
     2320    typeMetadata=FloatTypeMetadata(canBeNone=True), 
     2321    description=_( 
     2322u"""Value to use when a prediction cannot be made (because, for 
     2323example, when predictor values are missing). 
     2324 
     2325If a value is not provided for this parameter, a database NULL value 
     2326will be stored in the field when a prediction cannot be made. If the 
     2327field cannot store NULL values, as is the case with shapefiles, the 
     2328value -9999 will be used."""), 
     2329    arcGISDisplayName=_(u'Value to use when a prediction cannot be made'), 
     2330    arcGISCategory=_(u'Prediction options')) 
     2331 
     2332AddArgumentMetadata(GLM.PredictFromArcGISTable, u'outputPlotFile', 
     2333    typeMetadata=FileTypeMetadata(mustBeDifferentThanArguments=[u'inputModelFile', u'inputTable'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True), 
     2334    description=_( 
     2335u"""Diagnostic plot file to create for binary classification models. 
     2336 
     2337By default, the plot will show a receiver operating characteristic 
     2338(ROC) curve for the predictions, but you can specify different 
     2339performance measures if desired. The ROC curve plots the true positive 
     2340rate against the false positive rate. 
     2341 
     2342The file must have one of the following two extensions, which 
     2343determines the format that will be used: 
     2344 
     2345* .emf - Windows enhanced metafile (EMF) format. This is a vector 
     2346  format that may be printed and resized without any pixelation and is 
     2347  therefore suitable for use in printable documents that recognize 
     2348  this format (e.g. Microsoft Word or Microsoft Visio). 
     2349 
     2350* .png - Portable network graphics (PNG) format. This is a compressed, 
     2351  lossless, highly portable raster format suitable for use in web 
     2352  pages or other locations where a raster format is desired. Most 
     2353  scientific journals accept PNG; they typically request that files 
     2354  have a resolution of at least 1000 DPI. 
     2355"""), 
     2356    direction=u'Output', 
     2357    arcGISDisplayName=_(u'Output diagnostic plot file'), 
     2358    arcGISCategory=_(u'Binary classification options')) 
     2359 
     2360AddArgumentMetadata(GLM.PredictFromArcGISTable, u'measure1', 
     2361    typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'acc', u'cal', u'chisq', u'ecost', u'err', u'f', u'fall', u'fnr', u'fpr', u'lift', u'mat', u'mi', u'miss', u'npv', u'odds', u'ppv', u'pcfall', u'pcmiss', u'phi', u'prec', u'rch', u'rec', u'rnp', u'rpp', u'sar', u'sens', u'spec', u'tnr', u'tpr']), 
     2362    description=_( 
     2363u"""The first performance measure to plot. 
     2364 
     2365This measure will serve as the Y coordinate for the plot. If a second 
     2366measure is specified, it will serve as the X coordinate. Otherwise the 
     2367model cutoff will serve as the X coordinate. 
     2368 
     2369When modeling a binary response, an important task is selecting a 
     2370cutoff. The cutoff is the value used to determine whether a given 
     2371predicted value of the response variable should be classified as 
     2372positive or negative. Because the model typically outputs predictions 
     2373along a continual range (e.g. between 0.0 and 1.0), you must determine 
     2374the portions of the range that represent positive and negative 
     2375responses. Response values less than the cutoff are classified as 
     2376negative and those greater than or equal to the cutoff are classified 
     2377as positive. 
     2378 
     2379Plots that use the cutoff as the X coordinate show you a measure of 
     2380the model's performance for the range of cutoff values. You can use 
     2381this this plot to select the cutoff value that maximizes, minimizes, 
     2382or otherwise optimizes a given performance measure. For example, if 
     2383you selected "acc" (accuracy) as the performance measure, you could 
     2384find the cutoff value that maximized the model's accuracy by finding 
     2385the highest point on the plot and then looking up the cutoff value on 
     2386the X axis. 
     2387 
     2388Plots that use a second performance measure as the X coordinate allow 
     2389you to balance one measure of the model's performance against another. 
     2390The plot will be color-coded by cutoff value, allowing you to look up 
     2391the cutoff for a given combination of the two performance measures. 
     2392Selecting an optimal cutoff will often involve making a tradeoff 
     2393between the two measures. 
     2394 
     2395For example, to create a classic receiver operating characteristic 
     2396(ROC) curve, select "tpr" (true positive rate) for the first measure 
     2397and "fpr" (false positive rate) for the second measure. 
     2398 
     2399For the first performance measure, you can select from the following 
     2400list. This list is taken from the documentation of the R ROCR package 
     2401that implements the plots. Some of them do not allow selection of a 
     2402second performance measure (you must omit it when invoking this tool), 
     2403as noted in the measure's description. 
     2404 
     2405In these descriptions, Y and Yhat are random variables representing 
     2406the class and the prediction for a randomly drawn sample, 
     2407respectively. + and - are the positive and negative class, 
     2408respectively. The following abbreviations are used for for empirical 
     2409quantities: P (# positive samples), N (# negative samples), TP (# true 
     2410positives), TN (# true negatives), FP (# false positives), FN (# false 
     2411negatives). 
     2412 
     2413* acc - Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N). 
     2414 
     2415* cal - Calibration error. The calibration error is the absolute 
     2416  difference between predicted confidence and actual reliability. This 
     2417  error is estimated at all cutoffs by sliding a window of size 100 
     2418  across the range of possible cutoffs. E.g., if for several positive 
     2419  samples the output of the classifier is around 0.75, you might 
     2420  expect from a well-calibrated classifier that the fraction of them 
     2421  which is correctly predicted as positive is also around 0.75. In a 
     2422  well-calibrated classifier, the probabilistic confidence estimates 
     2423  are realistic. Only for use with probabilistic output (i.e. scores 
     2424  between 0 and 1; some of the other measures actually support values 
     2425  between -1 and 1). 
     2426 
     2427* chisq - Chi square test statistic. Note that R might raise a warning 
     2428  if the sample size is too small. 
     2429 
     2430* ecost - Expected cost. For details on cost curves, cf. Drummond & 
     2431  Holte 2000, 2004. ecost has an obligatory x axis, the so-called 
     2432  'probability-cost function'; thus you may not specify a second 
     2433  performance measure. 
     2434 
     2435* err - Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N). 
     2436 
     2437* f - Precision-recall F measure (van Rijsbergen, 1979). Weighted 
     2438  harmonic mean of precision (P) and recall (R). F = 1/ (alpha*1/P + 
     2439  (1-alpha)*1/R). For this tool, alpha is always 1/2, so the mean is 
     2440  balanced. 
     2441 
     2442* fall - Fallout. Same as fpr.  
     2443 
     2444* fnr - False negative rate. P(Yhat = - | Y = +). Estimated as: FN/P. 
     2445 
     2446* fpr - False positive rate. P(Yhat = + | Y = -). Estimated as: FP/N. 
     2447 
     2448* lift - Lift value. P(Yhat = + | Y = +)/P(Yhat = +). 
     2449 
     2450* mat - Matthews correlation coefficient. Same as phi. 
     2451 
     2452* mi - Mutual information. I(Yhat, Y) := H(Y) - H(Y | Yhat), where H 
     2453  is the (conditional) entropy. Entropies are estimated naively (no 
     2454  bias correction). 
     2455 
     2456* miss - Miss. Same as fnr. 
     2457 
     2458* npv - Negative predictive value. P(Y = - | Yhat = -). Estimated as: 
     2459  TN/(TN+FN). 
     2460 
     2461* odds - Odds ratio. (TP*TN)/(FN*FP). Note that odds ratio produces 
     2462  Inf or NA values for all cutoffs corresponding to FN=0 or FP=0. This 
     2463  can substantially decrease the plotted cutoff region. 
     2464 
     2465* pcfall - Prediction-conditioned fallout. P(Y = - | Yhat = +). 
     2466  Estimated as: FP/(TP+FP). 
     2467 
     2468* pcmiss - Prediction-conditioned miss. P(Y = + | Yhat = -). Estimated 
     2469  as: FN/(TN+FN). 
     2470 
     2471* phi - Phi correlation coefficient. (TP*TN - 
     2472  FP*FN)/(sqrt((TP+FN)*(TN+FP)*(TP+FP)*(TN+FN))). Yields a number 
     2473  between -1 and 1, with 1 indicating a perfect prediction, 0 indicating 
     2474  a random prediction. Values below 0 indicate a worse than random 
     2475  prediction. 
     2476 
     2477* ppv - Positive predictive value. P(Y = + | Yhat = +). Estimated as: 
     2478  TP/(TP+FP). 
     2479 
     2480* prec - Precision. Same as ppv.  
     2481 
     2482* rch - ROC convex hull. A ROC (=tpr vs fpr) curve with concavities 
     2483  (which represent suboptimal choices of cutoff) removed (Fawcett 
     2484  2001). Since the result is already a parametric performance curve, 
     2485  it cannot be used in combination with other measures (thus you may 
     2486  not specify a second performance measure). 
     2487 
     2488* rec - Recall. Same as tpr.  
     2489 
     2490* rnp - Rate of negative predictions. P(Yhat = -). Estimated as: 
     2491  (TN+FN)/(TP+FP+TN+FN). 
     2492 
     2493* rpp - Rate of positive predictions. P(Yhat = +). Estimated as: 
     2494  (TP+FP)/(TP+FP+TN+FN). 
     2495 
     2496* sar - Score combinining performance measures of different 
     2497  characteristics, in the attempt of creating a more "robust" measure 
     2498  (cf. Caruana R., ROCAI2004): SAR = 1/3 * (Accuracy + Area under the 
     2499  ROC curve + Root mean-squared error). 
     2500 
     2501* sens - Sensitivity. Same as tpr.  
     2502 
     2503* spec - Specificity. Same as tnr.  
     2504 
     2505* tnr - True negative rate. P(Yhat = - | Y = -). Estimated as: TN/N. 
     2506 
     2507* tpr - True positive rate. P(Yhat = + | Y = +). Estimated as: TP/P. 
     2508"""), 
     2509    arcGISDisplayName=_(u'First performance measure to plot'), 
     2510    arcGISCategory=_(u'Binary classification options')) 
     2511 
     2512AddArgumentMetadata(GLM.PredictFromArcGISTable, u'measure2', 
     2513    typeMetadata=UnicodeStringTypeMetadata(canBeNone=True, allowedValues=[u'acc', u'cal', u'chisq', u'err', u'f', u'fall', u'fnr', u'fpr', u'lift', u'mat', u'mi', u'miss', u'npv', u'odds', u'ppv', u'pcfall', u'pcmiss', u'phi', u'prec', u'rec', u'rnp', u'rpp', u'sar', u'sens', u'spec', u'tnr', u'tpr']), 
     2514    description=_( 
     2515u"""The second performance measure to plot. 
     2516 
     2517If specified, this performance measure will be used as the X 
     2518coordinate of the plot instead of the model cutoff value. Please see 
     2519the documentation for the First Performance Measure for more 
     2520information."""), 
     2521    arcGISDisplayName=_(u'Second performance measure to plot'), 
     2522    arcGISCategory=_(u'Binary classification options')) 
     2523 
     2524AddArgumentMetadata(GLM.PredictFromArcGISTable, u'colorize', 
     2525    typeMetadata=BooleanTypeMetadata(), 
     2526    description=_( 
     2527u"""If True (the default) and a second performance measure is selected 
     2528for plotting, the plot will be colorized by the cutoff value. 
     2529Otherwise, the plot will be black."""), 
     2530    arcGISDisplayName=_(u'Colorize plot by cutoff value')) 
     2531 
     2532CopyArgumentMetadata(GLM.FitToArcGISTable, u'res', GLM.PredictFromArcGISTable, u'res') 
     2533CopyArgumentMetadata(GLM.FitToArcGISTable, u'width', GLM.PredictFromArcGISTable, u'width') 
     2534CopyArgumentMetadata(GLM.FitToArcGISTable, u'height', GLM.PredictFromArcGISTable, u'height') 
     2535CopyArgumentMetadata(GLM.FitToArcGISTable, u'pointSize', GLM.PredictFromArcGISTable, u'pointSize') 
     2536CopyArgumentMetadata(GLM.FitToArcGISTable, u'bg', GLM.PredictFromArcGISTable, u'bg') 
     2537CopyArgumentMetadata(GLM.FitToArcGISTable, u'overwriteExisting', GLM.PredictFromArcGISTable, u'overwriteExisting') 
     2538 
     2539AddResultMetadata(GLM.PredictFromArcGISTable, u'outputCutoff', 
     2540    typeMetadata=FloatTypeMetadata(canBeNone=True), 
     2541    description=_( 
     2542u"""Output cutoff value, either the value provided as input or, if one 
     2543was not provided, the value selected automatically."""), 
     2544    arcGISDisplayName=_(u'Output cutoff')) 
     2545 
    21222546# Public method: GLM.PredictFromArcGISRasters 
    21232547 
     
    21322556 
    21332557CopyArgumentMetadata(GLM.FitToArcGISTable, u'cls', GLM.PredictFromArcGISRasters, u'cls') 
    2134  
    2135 AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'inputModelFile', 
    2136     typeMetadata=FileTypeMetadata(mustExist=True), 
    2137     description=_( 
    2138 u"""File that contains the fitted model, generated by the Fit GLM 
    2139 tool."""), 
    2140     arcGISDisplayName=_(u'Input model file')) 
     2558CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'inputModelFile', GLM.PredictFromArcGISRasters, u'inputModelFile') 
    21412559 
    21422560AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'outputResponseRaster', 
     
    27343152CopyArgumentMetadata(GLM.FitToArcGISTable, u'overwriteExisting', GAM.FitToArcGISTable, u'overwriteExisting') 
    27353153 
     3154# Public method: GAM.PredictFromArcGISTable 
     3155 
     3156AddMethodMetadata(GAM.PredictFromArcGISTable, 
     3157    shortDescription=_(u'Given a fitted generalized additive model (GAM) and a table containing fields for the predictor variables, this tool predicts the response variable for each row of the table.'), 
     3158    isExposedToPythonCallers=True, 
     3159    isExposedByCOM=True, 
     3160    isExposedAsArcGISTool=True, 
     3161    arcGISDisplayName=_(u'Predict GAM From Table'), 
     3162    arcGISToolCategory=_(u'Statistics\\Model Data\\Generalized Additive Models'), 
     3163    dependencies=[ArcGISDependency(9, 2), RDependency(2, 6, 0), RPackageDependency(u'caret')]) 
     3164 
     3165CopyArgumentMetadata(GAM.FitToArcGISTable, u'cls', GAM.PredictFromArcGISTable, u'cls') 
     3166 
     3167AddArgumentMetadata(GAM.PredictFromArcGISTable, u'inputModelFile', 
     3168    typeMetadata=FileTypeMetadata(mustExist=True), 
     3169    description=_( 
     3170u"""File that contains the fitted model, generated by the Fit GAM 
     3171tool."""), 
     3172    arcGISDisplayName=_(u'Input model file')) 
     3173 
     3174CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'inputTable', GAM.PredictFromArcGISTable, u'inputTable') 
     3175CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'actualValuesField', GAM.PredictFromArcGISTable, u'actualValuesField') 
     3176CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'predictedValuesField', GAM.PredictFromArcGISTable, u'predictedValuesField') 
     3177CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'cutoff', GAM.PredictFromArcGISTable, u'cutoff') 
     3178CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'where', GAM.PredictFromArcGISTable, u'where') 
     3179CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'ignoreOutOfRangeValues', GAM.PredictFromArcGISTable, u'ignoreOutOfRangeValues') 
     3180CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'noDataValue', GAM.PredictFromArcGISTable, u'noDataValue') 
     3181CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'outputPlotFile', GAM.PredictFromArcGISTable, u'outputPlotFile') 
     3182CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'measure1', GAM.PredictFromArcGISTable, u'measure1') 
     3183CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'measure2', GAM.PredictFromArcGISTable, u'measure2') 
     3184CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'colorize', GAM.PredictFromArcGISTable, u'colorize') 
     3185CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'res', GAM.PredictFromArcGISTable, u'res') 
     3186CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'width', GAM.PredictFromArcGISTable, u'width') 
     3187CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'height', GAM.PredictFromArcGISTable, u'height') 
     3188CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'pointSize', GAM.PredictFromArcGISTable, u'pointSize') 
     3189CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'bg', GAM.PredictFromArcGISTable, u'bg') 
     3190CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'overwriteExisting', GAM.PredictFromArcGISTable, u'overwriteExisting') 
     3191 
     3192CopyResultMetadata(GLM.PredictFromArcGISTable, u'outputCutoff', GAM.PredictFromArcGISTable, u'outputCutoff') 
     3193 
    27363194# Public method: GAM.PredictFromArcGISRasters 
    27373195 
     
    27463204 
    27473205CopyArgumentMetadata(GAM.FitToArcGISTable, u'cls', GAM.PredictFromArcGISRasters, u'cls') 
    2748  
    2749 AddArgumentMetadata(GAM.PredictFromArcGISRasters, u'inputModelFile', 
    2750     typeMetadata=FileTypeMetadata(mustExist=True), 
    2751     description=_( 
    2752 u"""File that contains the fitted model, generated by the Fit GAM 
    2753 tool."""), 
    2754     arcGISDisplayName=_(u'Input model file')) 
    2755  
     3206CopyArgumentMetadata(GAM.PredictFromArcGISTable, u'inputModelFile', GAM.PredictFromArcGISRasters, u'inputModelFile') 
    27563207CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'outputResponseRaster', GAM.PredictFromArcGISRasters, u'outputResponseRaster') 
    27573208CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'templateRaster', GAM.PredictFromArcGISRasters, u'templateRaster') 
     
    34973948CopyArgumentMetadata(GLM.FitToArcGISTable, u'overwriteExisting', TreeModel.FitToArcGISTable, u'overwriteExisting') 
    34983949 
     3950# Public method: TreeModel.PredictFromArcGISTable 
     3951 
     3952AddMethodMetadata(TreeModel.PredictFromArcGISTable, 
     3953    shortDescription=_(u'Given a fitted tree model and a table containing fields for the predictor variables, this tool predicts the response variable for each row of the table.'), 
     3954    isExposedToPythonCallers=True, 
     3955    isExposedByCOM=True, 
     3956    isExposedAsArcGISTool=True, 
     3957    arcGISDisplayName=_(u'Predict Tree Model From Table'), 
     3958    arcGISToolCategory=_(u'Statistics\\Model Data\\Tree Models'), 
     3959    dependencies=[ArcGISDependency(9, 2), RDependency(2, 12, 0), RPackageDependency(u'rpart', u'3.1-48'), RPackageDependency(u'caret')]) 
     3960 
     3961CopyArgumentMetadata(TreeModel.FitToArcGISTable, u'cls', TreeModel.PredictFromArcGISTable, u'cls') 
     3962 
     3963AddArgumentMetadata(TreeModel.PredictFromArcGISTable, u'inputModelFile', 
     3964    typeMetadata=FileTypeMetadata(mustExist=True), 
     3965    description=_( 
     3966u"""File that contains the fitted model, generated by the Fit Tree 
     3967Model tool."""), 
     3968    arcGISDisplayName=_(u'Input model file')) 
     3969 
     3970CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'inputTable', TreeModel.PredictFromArcGISTable, u'inputTable') 
     3971CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'actualValuesField', TreeModel.PredictFromArcGISTable, u'actualValuesField') 
     3972CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'predictedValuesField', TreeModel.PredictFromArcGISTable, u'predictedValuesField') 
     3973CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'cutoff', TreeModel.PredictFromArcGISTable, u'cutoff') 
     3974CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'where', TreeModel.PredictFromArcGISTable, u'where') 
     3975CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'ignoreOutOfRangeValues', TreeModel.PredictFromArcGISTable, u'ignoreOutOfRangeValues') 
     3976CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'noDataValue', TreeModel.PredictFromArcGISTable, u'noDataValue') 
     3977CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'outputPlotFile', TreeModel.PredictFromArcGISTable, u'outputPlotFile') 
     3978CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'measure1', TreeModel.PredictFromArcGISTable, u'measure1') 
     3979CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'measure2', TreeModel.PredictFromArcGISTable, u'measure2') 
     3980CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'colorize', TreeModel.PredictFromArcGISTable, u'colorize') 
     3981CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'res', TreeModel.PredictFromArcGISTable, u'res') 
     3982CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'width', TreeModel.PredictFromArcGISTable, u'width') 
     3983CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'height', TreeModel.PredictFromArcGISTable, u'height') 
     3984CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'pointSize', TreeModel.PredictFromArcGISTable, u'pointSize') 
     3985CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'bg', TreeModel.PredictFromArcGISTable, u'bg') 
     3986CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'overwriteExisting', TreeModel.PredictFromArcGISTable, u'overwriteExisting') 
     3987 
     3988CopyResultMetadata(GLM.PredictFromArcGISTable, u'outputCutoff', TreeModel.PredictFromArcGISTable, u'outputCutoff') 
     3989 
    34993990# Public method: TreeModel.PredictFromArcGISRasters 
    35003991 
     
    35094000 
    35104001CopyArgumentMetadata(TreeModel.FitToArcGISTable, u'cls', TreeModel.PredictFromArcGISRasters, u'cls') 
    3511  
    3512 AddArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'inputModelFile', 
    3513     typeMetadata=FileTypeMetadata(mustExist=True), 
    3514     description=_( 
    3515 u"""File that contains the fitted model, generated by the Fit Tree 
    3516 Model tool."""), 
    3517     arcGISDisplayName=_(u'Input model file')) 
    3518  
     4002CopyArgumentMetadata(TreeModel.PredictFromArcGISTable, u'inputModelFile', TreeModel.PredictFromArcGISRasters, u'inputModelFile') 
    35194003CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'outputResponseRaster', TreeModel.PredictFromArcGISRasters, u'outputResponseRaster') 
    35204004CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'templateRaster', TreeModel.PredictFromArcGISRasters, u'templateRaster') 
     
    41884672CopyArgumentMetadata(GLM.FitToArcGISTable, u'overwriteExisting', RandomForestModel.FitToArcGISTable, u'overwriteExisting') 
    41894673 
     4674# Public method: RandomForestModel.PredictFromArcGISTable 
     4675 
     4676AddMethodMetadata(RandomForestModel.PredictFromArcGISTable, 
     4677    shortDescription=_(u'Given a fitted random forest model and a table containing fields for the predictor variables, this tool predicts the response variable for each row of the table.'), 
     4678    isExposedToPythonCallers=True, 
     4679    isExposedByCOM=True, 
     4680    isExposedAsArcGISTool=True, 
     4681    arcGISDisplayName=_(u'Predict Random Forest From Table'), 
     4682    arcGISToolCategory=_(u'Statistics\\Model Data\\Random Forest Models'), 
     4683    dependencies=[ArcGISDependency(9, 2), RDependency(2, 9, 0), RPackageDependency(u'caret')]) 
     4684 
     4685CopyArgumentMetadata(RandomForestModel.FitToArcGISTable, u'cls', RandomForestModel.PredictFromArcGISTable, u'cls') 
     4686 
     4687AddArgumentMetadata(RandomForestModel.PredictFromArcGISTable, u'inputModelFile', 
     4688    typeMetadata=FileTypeMetadata(mustExist=True), 
     4689    description=_( 
     4690u"""File that contains the fitted model, generated by the Fit Random 
     4691Forest Model tool."""), 
     4692    arcGISDisplayName=_(u'Fitted model file')) 
     4693 
     4694CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'inputTable', RandomForestModel.PredictFromArcGISTable, u'inputTable') 
     4695CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'actualValuesField', RandomForestModel.PredictFromArcGISTable, u'actualValuesField') 
     4696CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'predictedValuesField', RandomForestModel.PredictFromArcGISTable, u'predictedValuesField') 
     4697CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'cutoff', RandomForestModel.PredictFromArcGISTable, u'cutoff') 
     4698CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'where', RandomForestModel.PredictFromArcGISTable, u'where') 
     4699CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'ignoreOutOfRangeValues', RandomForestModel.PredictFromArcGISTable, u'ignoreOutOfRangeValues') 
     4700CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'noDataValue', RandomForestModel.PredictFromArcGISTable, u'noDataValue') 
     4701CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'outputPlotFile', RandomForestModel.PredictFromArcGISTable, u'outputPlotFile') 
     4702CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'measure1', RandomForestModel.PredictFromArcGISTable, u'measure1') 
     4703CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'measure2', RandomForestModel.PredictFromArcGISTable, u'measure2') 
     4704CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'colorize', RandomForestModel.PredictFromArcGISTable, u'colorize') 
     4705CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'res', RandomForestModel.PredictFromArcGISTable, u'res') 
     4706CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'width', RandomForestModel.PredictFromArcGISTable, u'width') 
     4707CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'height', RandomForestModel.PredictFromArcGISTable, u'height') 
     4708CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'pointSize', RandomForestModel.PredictFromArcGISTable, u'pointSize') 
     4709CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'bg', RandomForestModel.PredictFromArcGISTable, u'bg') 
     4710CopyArgumentMetadata(GLM.PredictFromArcGISTable, u'overwriteExisting', RandomForestModel.PredictFromArcGISTable, u'overwriteExisting') 
     4711 
     4712CopyResultMetadata(GLM.PredictFromArcGISTable, u'outputCutoff', RandomForestModel.PredictFromArcGISTable, u'outputCutoff') 
     4713 
    41904714# Public method: RandomForestModel.PredictFromArcGISRasters 
    41914715 
     
    42004724 
    42014725CopyArgumentMetadata(RandomForestModel.FitToArcGISTable, u'cls', RandomForestModel.PredictFromArcGISRasters, u'cls') 
    4202  
    4203 AddArgumentMetadata(RandomForestModel.PredictFromArcGISRasters, u'inputModelFile', 
    4204     typeMetadata=FileTypeMetadata(mustExist=True), 
    4205     description=_( 
    4206 u"""File that contains the fitted model, generated by the Fit Random 
    4207 Forest Model tool."""), 
    4208     arcGISDisplayName=_(u'Fitted model file')) 
     4726CopyArgumentMetadata(RandomForestModel.PredictFromArcGISTable, u'inputModelFile', RandomForestModel.PredictFromArcGISRasters, u'inputModelFile') 
    42094727 
    42104728AddArgumentMetadata(RandomForestModel.PredictFromArcGISRasters, u'outputResponseRaster', 
  • MGET/Branches/Jason/PythonPackage/src/GeoEco/Statistics/SummarizeModel.r

    r859 r1000  
    6262                result <- result[, 1] 
    6363            else result[, 3] <- result[, 1]^(1/(2 * result[, 2])) 
     64             
     65            result <- as.matrix(result[order(result, decreasing=TRUE)]) 
     66            colnames(result) <- c("VIF") 
     67 
    6468            result 
    6569        } 
    6670 
     71        if (model$family$family %in% c("binomial", "poisson")) 
     72            test <- "Chisq" 
     73        else 
     74            test <- "F" 
     75 
    6776        if (length(labels(terms(model))) >= 2) 
    68             s = c(capture.output(summary(model), anova(model)), "", "Variance Inflation Factors:", "", capture.output(print(vif(model)))) 
     77            s = c(capture.output(summary(model), anova(model, test=test)),  
     78                  "", 
     79                  sprintf("Deviance explained = %.1f%%", (1 - model$deviance / model$null.deviance) * 100), 
     80                  "",  
     81                  "Variance Inflation Factors:",  
     82                  "",  
     83                  capture.output(print(vif(model)))) 
    6984        else 
    70             s = c(capture.output(summary(model), anova(model))) 
     85            s = c(capture.output(summary(model), anova(model, test=test)), 
     86                  "", 
     87                  sprintf("Deviance explained = %.1f%%", (1 - model$deviance / model$null.deviance) * 100)) 
    7188    } 
    7289