📚 Guides
Configuration

Configuration

Configuration file

The config/config.json file configures all steps of the pipeline. You can use the config/config.template.json file as a template to create your own configuration file.

For setting up alternate locations for the config.json file, see the metadata section.

💡

The full schema specification and description of each parameter can be found in the API Reference section.

Template:

{
    "version": "1.6",
    "general": {
        "metadata": {
            "github_repository": "tum-esm/em27-metadata-storage",
            "access_token": null
        },
        "data": {
            "ground_pressure": {
                "path": "path-to-ground-pressure-data",
                "file_regex": "^ground-pressure-$(SENSOR_ID)-$(YYYY)-$(MM)-$(DD).csv$",
                "separator": ",",
                "datetime_column": null,
                "datetime_column_format": null,
                "date_column": "UTCdate_____",
                "date_column_format": "%Y-%m-%d",
                "time_column": "UTCtime_____",
                "time_column_format": "%H:%M:%S",
                "unix_timestamp_column": null,
                "unix_timestamp_column_format": null,
                "pressure_column": "pressure",
                "pressure_column_format": "hPa"
            },
            "atmospheric_profiles": "path-to-atmospheric-profiles",
            "interferograms": "path-to-ifg-upload-directory",
            "results": "path-to-results-storage"
        }
    },
    "profiles": {
        "server": {
            "email": "...@...",
            "max_parallel_requests": 25
        },
        "scope": {
            "from_date": "2022-01-01",
            "to_date": "2022-01-05",
            "models": [
                "GGG2014",
                "GGG2020"
            ]
        },
        "GGG2020_standard_sites": [
            {
                "identifier": "mu",
                "lat": 48.151,
                "lon": 11.569,
                "from_date": "2019-01-01",
                "to_date": "2099-12-31"
            }
        ]
    },
    "retrieval": {
        "general": {
            "max_process_count": 9,
            "ifg_file_regex": "^$(SENSOR_ID)$(DATE).*\\.\\d+$",
            "queue_verbosity": "compact",
            "container_dir": null
        },
        "jobs": [
            {
                "retrieval_algorithm": "proffast-1.0",
                "atmospheric_profile_model": "GGG2014",
                "sensor_ids": [
                    "ma",
                    "mb",
                    "mc",
                    "md",
                    "me"
                ],
                "from_date": "2019-01-01",
                "to_date": "2022-12-31",
                "settings": {
                    "store_binary_spectra": true,
                    "dc_min_threshold": 0.05,
                    "dc_var_threshold": 0.1,
                    "use_local_pressure_in_pcxs": true,
                    "use_ifg_corruption_filter": false,
                    "custom_ils": {
                        "ma": {
                            "channel1_me": 0.9892,
                            "channel1_pe": -0.001082,
                            "channel2_me": 0.9892,
                            "channel2_pe": -0.001082
                        }
                    },
                    "output_suffix": "template_config",
                    "pressure_calibration_factors": {
                        "mb": 0.999819
                    },
                    "pressure_calibration_offsets": {
                        "mb": -0.000125
                    }
                }
            },
            {
                "retrieval_algorithm": "proffast-2.3",
                "atmospheric_profile_model": "GGG2020",
                "sensor_ids": [
                    "ma",
                    "mb",
                    "mc",
                    "md",
                    "me"
                ],
                "from_date": "2019-01-01",
                "to_date": "2099-12-31",
                "settings": {
                    "store_binary_spectra": false,
                    "dc_min_threshold": 0.05,
                    "dc_var_threshold": 0.1,
                    "use_local_pressure_in_pcxs": false,
                    "use_ifg_corruption_filter": true,
                    "custom_ils": {},
                    "output_suffix": null,
                    "pressure_calibration_factors": {},
                    "pressure_calibration_offsets": {}
                }
            }
        ]
    },
    "bundles": [
        {
            "dst_dir": "directory-to-write-the-bundles-to",
            "output_formats": [
                "csv",
                "parquet"
            ],
            "from_datetime": "2022-01-01T00:00:00Z",
            "to_datetime": "2022-12-31T23:59:59Z",
            "retrieval_algorithms": [
                "proffast-1.0",
                "proffast-2.4"
            ],
            "atmospheric_profile_models": [
                "GGG2014",
                "GGG2020"
            ],
            "sensor_ids": [
                "ma",
                "mb",
                "mc",
                "md",
                "me"
            ],
            "bundle_suffix": null,
            "retrieval_job_output_suffix": null,
            "parse_dc_timeseries": true
        }
    ],
    "geoms": {
        "sensor_ids": [
            "ma",
            "mb",
            "mc",
            "md",
            "me"
        ],
        "retrieval_algorithms": [
            "proffast-1.0",
            "proffast-2.4"
        ],
        "atmospheric_profile_models": [
            "GGG2014",
            "GGG2020"
        ],
        "from_datetime": "2022-01-01T00:00:00Z",
        "to_datetime": "2022-12-31T23:59:59Z",
        "parse_dc_timeseries": false,
        "max_sza": 80.0,
        "min_xair": 0.98,
        "max_xair": 1.02,
        "conflict_mode": "replace"
    }
}

The next section describes how the directories for the data sources (interferograms, ground pressure, atmospheric profiles), the outputs of this pipeline (retrieval results, bundles, logs), and the internal directories (containers, etc.) are structured.