public final class InputDataConfig extends GeneratedMessageV3 implements InputDataConfigOrBuilder
   
   Specifies Vertex AI owned input data to be used for training, and
 possibly evaluating, the Model.
 Protobuf type google.cloud.aiplatform.v1.InputDataConfig
 
  
  
  
  Static Fields
  
  
  
    public static final int ANNOTATIONS_FILTER_FIELD_NUMBER
   
  Field Value
  
  
  
    public static final int ANNOTATION_SCHEMA_URI_FIELD_NUMBER
   
  Field Value
  
  
  
    public static final int BIGQUERY_DESTINATION_FIELD_NUMBER
   
  Field Value
  
  
  
    public static final int DATASET_ID_FIELD_NUMBER
   
  Field Value
  
  
  
    public static final int FILTER_SPLIT_FIELD_NUMBER
   
  Field Value
  
  
  
    public static final int FRACTION_SPLIT_FIELD_NUMBER
   
  Field Value
  
  
  
    public static final int GCS_DESTINATION_FIELD_NUMBER
   
  Field Value
  
  
  
    public static final int PERSIST_ML_USE_ASSIGNMENT_FIELD_NUMBER
   
  Field Value
  
  
  
    public static final int PREDEFINED_SPLIT_FIELD_NUMBER
   
  Field Value
  
  
  
    public static final int SAVED_QUERY_ID_FIELD_NUMBER
   
  Field Value
  
  
  
    public static final int STRATIFIED_SPLIT_FIELD_NUMBER
   
  Field Value
  
  
  
    public static final int TIMESTAMP_SPLIT_FIELD_NUMBER
   
  Field Value
  
  Static Methods
  
  
  
  
    public static InputDataConfig getDefaultInstance()
   
  Returns
  
  
  
  
    public static final Descriptors.Descriptor getDescriptor()
   
  Returns
  
  
  
  
    public static InputDataConfig.Builder newBuilder()
   
  Returns
  
  
  
  
    public static InputDataConfig.Builder newBuilder(InputDataConfig prototype)
   
  Parameter
  
  Returns
  
  
  
  
    public static InputDataConfig parseDelimitedFrom(InputStream input)
   
  Parameter
  
  Returns
  
  Exceptions
  
  
  
  
    public static InputDataConfig parseDelimitedFrom(InputStream input, ExtensionRegistryLite extensionRegistry)
   
  Parameters
  
  Returns
  
  Exceptions
  
  
  
  
    public static InputDataConfig parseFrom(byte[] data)
   
  Parameter
  
    
      
        | Name | 
        Description | 
      
      
        data | 
        byte[]
  | 
      
    
  
  Returns
  
  Exceptions
  
  
  
  
    public static InputDataConfig parseFrom(byte[] data, ExtensionRegistryLite extensionRegistry)
   
  Parameters
  
  Returns
  
  Exceptions
  
  
  
  
    public static InputDataConfig parseFrom(ByteString data)
   
  Parameter
  
  Returns
  
  Exceptions
  
  
  
  
    public static InputDataConfig parseFrom(ByteString data, ExtensionRegistryLite extensionRegistry)
   
  Parameters
  
  Returns
  
  Exceptions
  
  
  
  
    public static InputDataConfig parseFrom(CodedInputStream input)
   
  Parameter
  
  Returns
  
  Exceptions
  
  
  
  
    public static InputDataConfig parseFrom(CodedInputStream input, ExtensionRegistryLite extensionRegistry)
   
  Parameters
  
  Returns
  
  Exceptions
  
  
  
  
    public static InputDataConfig parseFrom(InputStream input)
   
  Parameter
  
  Returns
  
  Exceptions
  
  
  
  
    public static InputDataConfig parseFrom(InputStream input, ExtensionRegistryLite extensionRegistry)
   
  Parameters
  
  Returns
  
  Exceptions
  
  
  
  
    public static InputDataConfig parseFrom(ByteBuffer data)
   
  Parameter
  
  Returns
  
  Exceptions
  
  
  
  
    public static InputDataConfig parseFrom(ByteBuffer data, ExtensionRegistryLite extensionRegistry)
   
  Parameters
  
  Returns
  
  Exceptions
  
  
  
  
    public static Parser<InputDataConfig> parser()
   
  Returns
  
  Methods
  
  
  
  
    public boolean equals(Object obj)
   
  Parameter
  
  Returns
  
  Overrides
  
  
  
  
    public String getAnnotationSchemaUri()
   
   Applicable only to custom training with Datasets that have DataItems and
 Annotations.
 Cloud Storage URI that points to a YAML file describing the annotation
 schema. The schema is defined as an OpenAPI 3.0.2 Schema
 Object.
 The schema files that can be used here are found in
 gs://google-cloud-aiplatform/schema/dataset/annotation/ , note that the
 chosen schema must be consistent with
 metadata of the
 Dataset specified by
 dataset_id.
 Only Annotations that both match this schema and belong to DataItems not
 ignored by the split method are used in respectively training, validation
 or test role, depending on the role of the DataItem they are on.
 When used in conjunction with
 annotations_filter,
 the Annotations used for training are filtered by both
 annotations_filter
 and
 annotation_schema_uri.
 string annotation_schema_uri = 9;
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        String | 
        The annotationSchemaUri. 
 | 
      
    
  
  
  
  
    public ByteString getAnnotationSchemaUriBytes()
   
   Applicable only to custom training with Datasets that have DataItems and
 Annotations.
 Cloud Storage URI that points to a YAML file describing the annotation
 schema. The schema is defined as an OpenAPI 3.0.2 Schema
 Object.
 The schema files that can be used here are found in
 gs://google-cloud-aiplatform/schema/dataset/annotation/ , note that the
 chosen schema must be consistent with
 metadata of the
 Dataset specified by
 dataset_id.
 Only Annotations that both match this schema and belong to DataItems not
 ignored by the split method are used in respectively training, validation
 or test role, depending on the role of the DataItem they are on.
 When used in conjunction with
 annotations_filter,
 the Annotations used for training are filtered by both
 annotations_filter
 and
 annotation_schema_uri.
 string annotation_schema_uri = 9;
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        ByteString | 
        The bytes for annotationSchemaUri. 
 | 
      
    
  
  
  
  
    public String getAnnotationsFilter()
   
   Applicable only to Datasets that have DataItems and Annotations.
 A filter on Annotations of the Dataset. Only Annotations that both
 match this filter and belong to DataItems not ignored by the split method
 are used in respectively training, validation or test role, depending on
 the role of the DataItem they are on (for the auto-assigned that role is
 decided by Vertex AI). A filter with same syntax as the one used in
 ListAnnotations
 may be used, but note here it filters across all Annotations of the
 Dataset, and not just within a single DataItem.
 string annotations_filter = 6;
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        String | 
        The annotationsFilter. 
 | 
      
    
  
  
  
  
    public ByteString getAnnotationsFilterBytes()
   
   Applicable only to Datasets that have DataItems and Annotations.
 A filter on Annotations of the Dataset. Only Annotations that both
 match this filter and belong to DataItems not ignored by the split method
 are used in respectively training, validation or test role, depending on
 the role of the DataItem they are on (for the auto-assigned that role is
 decided by Vertex AI). A filter with same syntax as the one used in
 ListAnnotations
 may be used, but note here it filters across all Annotations of the
 Dataset, and not just within a single DataItem.
 string annotations_filter = 6;
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        ByteString | 
        The bytes for annotationsFilter. 
 | 
      
    
  
  
  
  
    public BigQueryDestination getBigqueryDestination()
   
   Only applicable to custom training with tabular Dataset with BigQuery
 source.
 The BigQuery project location where the training data is to be written
 to. In the given project a new dataset is created with name
 dataset_<dataset-id><annotation-type><timestamp-of-training-call>
 where timestamp is in YYYY_MM_DDThh_mm_ss_sssZ format. All training
 input data is written into that dataset. In the dataset three
 tables are created, training, validation and test.
- AIP_DATA_FORMAT = "bigquery".
 
- AIP_TRAINING_DATA_URI  =
"bigquery_destination.dataset_<dataset-id><annotation-type><time>.training"
 
- AIP_VALIDATION_DATA_URI =
"bigquery_destination.dataset_<dataset-id><annotation-type><time>.validation"
 
- AIP_TEST_DATA_URI =
"bigquery_destination.dataset_<dataset-id><annotation-type><time>.test"
 
 .google.cloud.aiplatform.v1.BigQueryDestination bigquery_destination = 10;
 
  Returns
  
  
  
  
    public BigQueryDestinationOrBuilder getBigqueryDestinationOrBuilder()
   
   Only applicable to custom training with tabular Dataset with BigQuery
 source.
 The BigQuery project location where the training data is to be written
 to. In the given project a new dataset is created with name
 dataset_<dataset-id><annotation-type><timestamp-of-training-call>
 where timestamp is in YYYY_MM_DDThh_mm_ss_sssZ format. All training
 input data is written into that dataset. In the dataset three
 tables are created, training, validation and test.
- AIP_DATA_FORMAT = "bigquery".
 
- AIP_TRAINING_DATA_URI  =
"bigquery_destination.dataset_<dataset-id><annotation-type><time>.training"
 
- AIP_VALIDATION_DATA_URI =
"bigquery_destination.dataset_<dataset-id><annotation-type><time>.validation"
 
- AIP_TEST_DATA_URI =
"bigquery_destination.dataset_<dataset-id><annotation-type><time>.test"
 
 .google.cloud.aiplatform.v1.BigQueryDestination bigquery_destination = 10;
 
  Returns
  
  
  
  
    public String getDatasetId()
   
   Required. The ID of the Dataset in the same Project and Location which data
 will be used to train the Model. The Dataset must use schema compatible
 with Model being trained, and what is compatible should be described in the
 used TrainingPipeline's [training_task_definition]
 [google.cloud.aiplatform.v1.TrainingPipeline.training_task_definition].
 For tabular Datasets, all their data is exported to training, to pick
 and choose from.
 string dataset_id = 1 [(.google.api.field_behavior) = REQUIRED];
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        String | 
        The datasetId. 
 | 
      
    
  
  
  
  
    public ByteString getDatasetIdBytes()
   
   Required. The ID of the Dataset in the same Project and Location which data
 will be used to train the Model. The Dataset must use schema compatible
 with Model being trained, and what is compatible should be described in the
 used TrainingPipeline's [training_task_definition]
 [google.cloud.aiplatform.v1.TrainingPipeline.training_task_definition].
 For tabular Datasets, all their data is exported to training, to pick
 and choose from.
 string dataset_id = 1 [(.google.api.field_behavior) = REQUIRED];
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        ByteString | 
        The bytes for datasetId. 
 | 
      
    
  
  
  
  
    public InputDataConfig getDefaultInstanceForType()
   
  Returns
  
  
  
  
    public InputDataConfig.DestinationCase getDestinationCase()
   
  Returns
  
  
  
  
    public FilterSplit getFilterSplit()
   
   Split based on the provided filters for each set.
 .google.cloud.aiplatform.v1.FilterSplit filter_split = 3;
 
  Returns
  
  
  
  
    public FilterSplitOrBuilder getFilterSplitOrBuilder()
   
   Split based on the provided filters for each set.
 .google.cloud.aiplatform.v1.FilterSplit filter_split = 3;
 
  Returns
  
  
  
  
    public FractionSplit getFractionSplit()
   
   Split based on fractions defining the size of each set.
 .google.cloud.aiplatform.v1.FractionSplit fraction_split = 2;
 
  Returns
  
  
  
  
    public FractionSplitOrBuilder getFractionSplitOrBuilder()
   
   Split based on fractions defining the size of each set.
 .google.cloud.aiplatform.v1.FractionSplit fraction_split = 2;
 
  Returns
  
  
  
  
    public GcsDestination getGcsDestination()
   
   The Cloud Storage location where the training data is to be
 written to. In the given directory a new directory is created with
 name:
 dataset-<dataset-id>-<annotation-type>-<timestamp-of-training-call>
 where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format.
 All training input data is written into that directory.
 The Vertex AI environment variables representing Cloud Storage
 data URIs are represented in the Cloud Storage wildcard
 format to support sharded data. e.g.: "gs://.../training-*.jsonl"
- AIP_DATA_FORMAT = "jsonl" for non-tabular data, "csv" for tabular data
 
- AIP_TRAINING_DATA_URI =
"gcs_destination/dataset-<dataset-id>-<annotation-type>-<time>/training-*.${AIP_DATA_FORMAT}"
 
- AIP_VALIDATION_DATA_URI =
"gcs_destination/dataset-<dataset-id>-<annotation-type>-<time>/validation-*.${AIP_DATA_FORMAT}"
 
- AIP_TEST_DATA_URI =
"gcs_destination/dataset-<dataset-id>-<annotation-type>-<time>/test-*.${AIP_DATA_FORMAT}"
 
 .google.cloud.aiplatform.v1.GcsDestination gcs_destination = 8;
 
  Returns
  
  
  
  
    public GcsDestinationOrBuilder getGcsDestinationOrBuilder()
   
   The Cloud Storage location where the training data is to be
 written to. In the given directory a new directory is created with
 name:
 dataset-<dataset-id>-<annotation-type>-<timestamp-of-training-call>
 where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format.
 All training input data is written into that directory.
 The Vertex AI environment variables representing Cloud Storage
 data URIs are represented in the Cloud Storage wildcard
 format to support sharded data. e.g.: "gs://.../training-*.jsonl"
- AIP_DATA_FORMAT = "jsonl" for non-tabular data, "csv" for tabular data
 
- AIP_TRAINING_DATA_URI =
"gcs_destination/dataset-<dataset-id>-<annotation-type>-<time>/training-*.${AIP_DATA_FORMAT}"
 
- AIP_VALIDATION_DATA_URI =
"gcs_destination/dataset-<dataset-id>-<annotation-type>-<time>/validation-*.${AIP_DATA_FORMAT}"
 
- AIP_TEST_DATA_URI =
"gcs_destination/dataset-<dataset-id>-<annotation-type>-<time>/test-*.${AIP_DATA_FORMAT}"
 
 .google.cloud.aiplatform.v1.GcsDestination gcs_destination = 8;
 
  Returns
  
  
  
  
    public Parser<InputDataConfig> getParserForType()
   
  Returns
  
  Overrides
  
  
  
  
    public boolean getPersistMlUseAssignment()
   
   Whether to persist the ML use assignment to data item system labels.
 bool persist_ml_use_assignment = 11;
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        boolean | 
        The persistMlUseAssignment. 
 | 
      
    
  
  
  
  
    public PredefinedSplit getPredefinedSplit()
   
   Supported only for tabular Datasets.
 Split based on a predefined key.
 .google.cloud.aiplatform.v1.PredefinedSplit predefined_split = 4;
 
  Returns
  
  
  
  
    public PredefinedSplitOrBuilder getPredefinedSplitOrBuilder()
   
   Supported only for tabular Datasets.
 Split based on a predefined key.
 .google.cloud.aiplatform.v1.PredefinedSplit predefined_split = 4;
 
  Returns
  
  
  
  
    public String getSavedQueryId()
   
   Only applicable to Datasets that have SavedQueries.
 The ID of a SavedQuery (annotation set) under the Dataset specified by
 dataset_id used
 for filtering Annotations for training.
 Only Annotations that are associated with this SavedQuery are used in
 respectively training. When used in conjunction with
 annotations_filter,
 the Annotations used for training are filtered by both
 saved_query_id
 and
 annotations_filter.
 Only one of
 saved_query_id
 and
 annotation_schema_uri
 should be specified as both of them represent the same thing: problem type.
 string saved_query_id = 7;
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        String | 
        The savedQueryId. 
 | 
      
    
  
  
  
  
    public ByteString getSavedQueryIdBytes()
   
   Only applicable to Datasets that have SavedQueries.
 The ID of a SavedQuery (annotation set) under the Dataset specified by
 dataset_id used
 for filtering Annotations for training.
 Only Annotations that are associated with this SavedQuery are used in
 respectively training. When used in conjunction with
 annotations_filter,
 the Annotations used for training are filtered by both
 saved_query_id
 and
 annotations_filter.
 Only one of
 saved_query_id
 and
 annotation_schema_uri
 should be specified as both of them represent the same thing: problem type.
 string saved_query_id = 7;
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        ByteString | 
        The bytes for savedQueryId. 
 | 
      
    
  
  
  
  
    public int getSerializedSize()
   
  Returns
  
  Overrides
  
  
  
  
    public InputDataConfig.SplitCase getSplitCase()
   
  Returns
  
  
  
  
    public StratifiedSplit getStratifiedSplit()
   
   Supported only for tabular Datasets.
 Split based on the distribution of the specified column.
 .google.cloud.aiplatform.v1.StratifiedSplit stratified_split = 12;
 
  Returns
  
  
  
  
    public StratifiedSplitOrBuilder getStratifiedSplitOrBuilder()
   
   Supported only for tabular Datasets.
 Split based on the distribution of the specified column.
 .google.cloud.aiplatform.v1.StratifiedSplit stratified_split = 12;
 
  Returns
  
  
  
  
    public TimestampSplit getTimestampSplit()
   
   Supported only for tabular Datasets.
 Split based on the timestamp of the input data pieces.
 .google.cloud.aiplatform.v1.TimestampSplit timestamp_split = 5;
 
  Returns
  
  
  
  
    public TimestampSplitOrBuilder getTimestampSplitOrBuilder()
   
   Supported only for tabular Datasets.
 Split based on the timestamp of the input data pieces.
 .google.cloud.aiplatform.v1.TimestampSplit timestamp_split = 5;
 
  Returns
  
  
  
  
    public final UnknownFieldSet getUnknownFields()
   
  Returns
  
  Overrides
  
  
  
  
    public boolean hasBigqueryDestination()
   
   Only applicable to custom training with tabular Dataset with BigQuery
 source.
 The BigQuery project location where the training data is to be written
 to. In the given project a new dataset is created with name
 dataset_<dataset-id><annotation-type><timestamp-of-training-call>
 where timestamp is in YYYY_MM_DDThh_mm_ss_sssZ format. All training
 input data is written into that dataset. In the dataset three
 tables are created, training, validation and test.
- AIP_DATA_FORMAT = "bigquery".
 
- AIP_TRAINING_DATA_URI  =
"bigquery_destination.dataset_<dataset-id><annotation-type><time>.training"
 
- AIP_VALIDATION_DATA_URI =
"bigquery_destination.dataset_<dataset-id><annotation-type><time>.validation"
 
- AIP_TEST_DATA_URI =
"bigquery_destination.dataset_<dataset-id><annotation-type><time>.test"
 
 .google.cloud.aiplatform.v1.BigQueryDestination bigquery_destination = 10;
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        boolean | 
        Whether the bigqueryDestination field is set. 
 | 
      
    
  
  
  
  
    public boolean hasFilterSplit()
   
   Split based on the provided filters for each set.
 .google.cloud.aiplatform.v1.FilterSplit filter_split = 3;
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        boolean | 
        Whether the filterSplit field is set. 
 | 
      
    
  
  
  
  
    public boolean hasFractionSplit()
   
   Split based on fractions defining the size of each set.
 .google.cloud.aiplatform.v1.FractionSplit fraction_split = 2;
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        boolean | 
        Whether the fractionSplit field is set. 
 | 
      
    
  
  
  
  
    public boolean hasGcsDestination()
   
   The Cloud Storage location where the training data is to be
 written to. In the given directory a new directory is created with
 name:
 dataset-<dataset-id>-<annotation-type>-<timestamp-of-training-call>
 where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format.
 All training input data is written into that directory.
 The Vertex AI environment variables representing Cloud Storage
 data URIs are represented in the Cloud Storage wildcard
 format to support sharded data. e.g.: "gs://.../training-*.jsonl"
- AIP_DATA_FORMAT = "jsonl" for non-tabular data, "csv" for tabular data
 
- AIP_TRAINING_DATA_URI =
"gcs_destination/dataset-<dataset-id>-<annotation-type>-<time>/training-*.${AIP_DATA_FORMAT}"
 
- AIP_VALIDATION_DATA_URI =
"gcs_destination/dataset-<dataset-id>-<annotation-type>-<time>/validation-*.${AIP_DATA_FORMAT}"
 
- AIP_TEST_DATA_URI =
"gcs_destination/dataset-<dataset-id>-<annotation-type>-<time>/test-*.${AIP_DATA_FORMAT}"
 
 .google.cloud.aiplatform.v1.GcsDestination gcs_destination = 8;
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        boolean | 
        Whether the gcsDestination field is set. 
 | 
      
    
  
  
  
  
    public boolean hasPredefinedSplit()
   
   Supported only for tabular Datasets.
 Split based on a predefined key.
 .google.cloud.aiplatform.v1.PredefinedSplit predefined_split = 4;
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        boolean | 
        Whether the predefinedSplit field is set. 
 | 
      
    
  
  
  
  
    public boolean hasStratifiedSplit()
   
   Supported only for tabular Datasets.
 Split based on the distribution of the specified column.
 .google.cloud.aiplatform.v1.StratifiedSplit stratified_split = 12;
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        boolean | 
        Whether the stratifiedSplit field is set. 
 | 
      
    
  
  
  
  
    public boolean hasTimestampSplit()
   
   Supported only for tabular Datasets.
 Split based on the timestamp of the input data pieces.
 .google.cloud.aiplatform.v1.TimestampSplit timestamp_split = 5;
 
  Returns
  
    
      
        | Type | 
        Description | 
      
      
        boolean | 
        Whether the timestampSplit field is set. 
 | 
      
    
  
  
  
  
  Returns
  
  Overrides
  
  
  
  
    protected GeneratedMessageV3.FieldAccessorTable internalGetFieldAccessorTable()
   
  Returns
  
  Overrides
  
  
  
  
    public final boolean isInitialized()
   
  Returns
  
  Overrides
  
  
  
  
    public InputDataConfig.Builder newBuilderForType()
   
  Returns
  
  
  
  
    protected InputDataConfig.Builder newBuilderForType(GeneratedMessageV3.BuilderParent parent)
   
  Parameter
  
  Returns
  
  Overrides
  
  
  
  
    protected Object newInstance(GeneratedMessageV3.UnusedPrivateParameter unused)
   
  Parameter
  
  Returns
  
  Overrides
  
  
  
  
    public InputDataConfig.Builder toBuilder()
   
  Returns
  
  
  
  
    public void writeTo(CodedOutputStream output)
   
  Parameter
  
  Overrides
  
  Exceptions