@misc{fan2025incorporatingmultivariateconsistencymlbased,
title = {Incorporating Multivariate Consistency in ML-Based Weather Forecasting with Latent-space Constraints},
author = {Hang Fan and Yi Xiao and Yongquan Qu and Fenghua Ling and Ben Fei and Lei Bai and Pierre Gentine},
year = {2025},
eprint = {2510.04006},
archiveprefix = {arXiv},
primaryclass = {cs.LG},
url = {https://arxiv.org/abs/2510.04006},
abstract = {Data-driven machine learning (ML) models have recently shown promise in surpassing traditional physics-based approaches for weather forecasting, leading to a so-called second revolution in weather forecasting. However, most ML-based forecast models treat reanalysis as the truth and are trained under variable-specific loss weighting, ignoring their physical coupling and spatial structure. Over long time horizons, the forecasts become blurry and physically unrealistic under rollout training. To address this, we reinterpret model training as a weak-constraint four-dimensional variational data assimilation (WC-4DVar) problem, treating reanalysis data as imperfect observations. This allows the loss function to incorporate reanalysis error covariance and capture multivariate dependencies. In practice, we compute the loss in a latent space learned by an autoencoder (AE), where the reanalysis error covariance becomes approximately diagonal, thus avoiding the need to explicitly model it in the high-dimensional model space. We show that rollout training with latent-space constraints improves long-term forecast skill and better preserves fine-scale structures and physical realism compared to training with model-space loss. Finally, we extend this framework to accommodate heterogeneous data sources, enabling the forecast model to be trained jointly on reanalysis and multi-source observations within a unified theoretical formulation.}
}
@misc{agrawal2025operationaldeeplearningsatellitebased,
title = {An Operational Deep Learning System for Satellite-Based High-Resolution Global Nowcasting},
author = {Shreya Agrawal and Mohammed Alewi Hassen and Emmanuel Asiedu Brempong and Boris Babenko and Fred Zyda and Olivia Graham and Di Li and Samier Merchant and Santiago Hincapie Potes and Tyler Russell and Danny Cheresnick and Aditya Prakash Kakkirala and Stephan Rasp and Avinatan Hassidim and Yossi Matias and Nal Kalchbrenner and Pramod Gupta and Jason Hickey and Aaron Bell},
year = {2025},
eprint = {2510.13050},
archiveprefix = {arXiv},
primaryclass = {cs.LG},
url = {https://arxiv.org/abs/2510.13050},
abstract = {Precipitation nowcasting, which predicts rainfall up to a few hours ahead, is a critical tool for vulnerable communities in the Global South frequently exposed to intense, rapidly developing storms. Timely forecasts provide a crucial window to protect lives and livelihoods. Traditional numerical weather prediction (NWP) methods suffer from high latency, low spatial and temporal resolution, and significant gaps in accuracy across the world. Recent machine learning-based nowcasting methods, common in the Global North, cannot be extended to the Global South due to extremely sparse radar coverage. We present Global MetNet, an operational global machine learning nowcasting model. It leverages the Global Precipitation Mission's CORRA dataset, geostationary satellite data, and global NWP data to predict precipitation for the next 12 hours. The model operates at a high resolution of approximately 0.05° (~5km) spatially and 15 minutes temporally. Global MetNet significantly outperforms industry-standard hourly forecasts and achieves significantly higher skill, making forecasts useful over a much larger area of the world than previously available. Our model demonstrates better skill in data-sparse regions than even the best high-resolution NWP models achieve in the US. Validated using ground radar and satellite data, it shows significant improvements across key metrics like the critical success index and fractions skill score for all precipitation rates and lead times. Crucially, our model generates forecasts in under a minute, making it readily deployable for real-time applications. It is already deployed for millions of users on Google Search. This work represents a key step in reducing global disparities in forecast quality and integrating sparse, high-resolution satellite observations into weather forecasting.}
}
@misc{couairon2024archesweatherarchesweathergendeterministic,
title = {ArchesWeather & ArchesWeatherGen: a deterministic and generative model for efficient ML weather forecasting},
author = {Guillaume Couairon and Renu Singh and Anastase Charantonis and Christian Lessig and Claire Monteleoni},
year = {2024},
eprint = {2412.12971},
archiveprefix = {arXiv},
primaryclass = {cs.LG},
url = {https://arxiv.org/abs/2412.12971},
abstract = {Weather forecasting plays a vital role in today's society, from agriculture and logistics to predicting the output of renewable energies, and preparing for extreme weather events. Deep learning weather forecasting models trained with the next state prediction objective on ERA5 have shown great success compared to numerical global circulation models. However, for a wide range of applications, being able to provide representative samples from the distribution of possible future weather states is critical. In this paper, we propose a methodology to leverage deterministic weather models in the design of probabilistic weather models, leading to improved performance and reduced computing costs. We first introduce \textbf{ArchesWeather}, a transformer-based deterministic model that improves upon Pangu-Weather by removing overrestrictive inductive priors. We then design a probabilistic weather model called \textbf{ArchesWeatherGen} based on flow matching, a modern variant of diffusion models, that is trained to project ArchesWeather's predictions to the distribution of ERA5 weather states. ArchesWeatherGen is a true stochastic emulator of ERA5 and surpasses IFS ENS and NeuralGCM on all WeatherBench headline variables (except for NeuralGCM's geopotential). Our work also aims to democratize the use of deterministic and generative machine learning models in weather forecasting research, with academic computing resources. All models are trained at 1.5° resolution, with a training budget of ∼9 V100 days for ArchesWeather and ∼45 V100 days for ArchesWeatherGen. For inference, ArchesWeatherGen generates 15-day weather trajectories at a rate of 1 minute per ensemble member on a A100 GPU card. To make our work fully reproducible, our code and models are open source, including the complete pipeline for data preparation, training, and evaluation, at this https URL .}
}
@misc{lippe2023pderefinerachievingaccuratelong,
title = {PDE-Refiner: Achieving Accurate Long Rollouts with Neural PDE Solvers},
author = {Phillip Lippe and Bastiaan S. Veeling and Paris Perdikaris and Richard E. Turner and Johannes Brandstetter},
year = {2023},
eprint = {2308.05732},
archiveprefix = {arXiv},
primaryclass = {cs.LG},
url = {https://arxiv.org/abs/2308.05732},
abstract = {Time-dependent partial differential equations (PDEs) are ubiquitous in science and engineering. Recently, mostly due to the high computational cost of traditional solution techniques, deep neural network based surrogates have gained increased interest. The practical utility of such neural PDE solvers relies on their ability to provide accurate, stable predictions over long time horizons, which is a notoriously hard problem. In this work, we present a large-scale analysis of common temporal rollout strategies, identifying the neglect of non-dominant spatial frequency information, often associated with high frequencies in PDE solutions, as the primary pitfall limiting stable, accurate rollout performance. Based on these insights, we draw inspiration from recent advances in diffusion models to introduce PDE-Refiner; a novel model class that enables more accurate modeling of all frequency components via a multistep refinement process. We validate PDE-Refiner on challenging benchmarks of complex fluid dynamics, demonstrating stable and accurate rollouts that consistently outperform state-of-the-art models, including neural, numerical, and hybrid neural-numerical architectures. We further demonstrate that PDE-Refiner greatly enhances data efficiency, since the denoising objective implicitly induces a novel form of spectral data augmentation. Finally, PDE-Refiner's connection to diffusion models enables an accurate and efficient assessment of the model's predictive uncertainty, allowing us to estimate when the surrogate becomes inaccurate.}
}
@misc{alet2025skillfuljointprobabilisticweather,
title = {Skillful joint probabilistic weather forecasting from marginals},
author = {Ferran Alet and Ilan Price and Andrew El-Kadi and Dominic Masters and Stratis Markou and Tom R. Andersson and Jacklynn Stott and Remi Lam and Matthew Willson and Alvaro Sanchez-Gonzalez and Peter Battaglia},
year = {2025},
eprint = {2506.10772},
archiveprefix = {arXiv},
primaryclass = {cs.LG},
url = {https://arxiv.org/abs/2506.10772},
abstract = {Machine learning (ML)-based weather models have rapidly risen to prominence due to their greater accuracy and speed than traditional forecasts based on numerical weather prediction (NWP), recently outperforming traditional ensembles in global probabilistic weather forecasting. This paper presents FGN, a simple, scalable and flexible modeling approach which significantly outperforms the current state-of-the-art models. FGN generates ensembles via learned model-perturbations with an ensemble of appropriately constrained models. It is trained directly to minimize the continuous rank probability score (CRPS) of per-location forecasts. It produces state-of-the-art ensemble forecasts as measured by a range of deterministic and probabilistic metrics, makes skillful ensemble tropical cyclone track predictions, and captures joint spatial structure despite being trained only on marginals.}
}
@misc{price2024gencastdiffusionbasedensembleforecasting,
title = {GenCast: Diffusion-based ensemble forecasting for medium-range weather},
author = {Ilan Price and Alvaro Sanchez-Gonzalez and Ferran Alet and Tom R. Andersson and Andrew El-Kadi and Dominic Masters and Timo Ewalds and Jacklynn Stott and Shakir Mohamed and Peter Battaglia and Remi Lam and Matthew Willson},
year = {2024},
eprint = {2312.15796},
archiveprefix = {arXiv},
primaryclass = {cs.LG},
url = {https://arxiv.org/abs/2312.15796},
abstract = {Weather forecasts are fundamentally uncertain, so predicting the range of probable weather scenarios is crucial for important decisions, from warning the public about hazardous weather, to planning renewable energy use. Here, we introduce GenCast, a probabilistic weather model with greater skill and speed than the top operational medium-range weather forecast in the world, the European Centre for Medium-Range Forecasts (ECMWF)'s ensemble forecast, ENS. Unlike traditional approaches, which are based on numerical weather prediction (NWP), GenCast is a machine learning weather prediction (MLWP) method, trained on decades of reanalysis data. GenCast generates an ensemble of stochastic 15-day global forecasts, at 12-hour steps and 0.25 degree latitude-longitude resolution, for over 80 surface and atmospheric variables, in 8 minutes. It has greater skill than ENS on 97.4% of 1320 targets we evaluated, and better predicts extreme weather, tropical cyclones, and wind power production. This work helps open the next chapter in operational weather forecasting, where critical weather-dependent decisions are made with greater accuracy and efficiency.}
}
@article{JingrunChen2022,
author = {Jingrun Chen, Jingrun Chen and Xurong Chi, Xurong Chi and Weinan E, Weinan E and Zhouwang Yang, Zhouwang Yang},
journal = {Journal of Machine Learning},
title = {Bridging Traditional and Machine Learning-Based Algorithms for Solving PDEs: The Random Feature Method},
year = {2022},
issn = {2790-203X},
month = jan,
note = {(math)},
number = {3},
pages = {268--298},
volume = {1},
doi = {10.4208/jml.220726},
abstract = {One of the oldest and most studied subject in scientific computing is algorithms for solving partial differential equations (PDEs). A long list of numerical methods have been proposed and successfully used for various applications. In recent years, deep learning methods have shown their superiority for high-dimensional PDEs where traditional methods fail. However, for low dimensional problems, it remains unclear whether these methods have a real advantage over traditional algorithms as a direct solver. In this work, we propose the random feature method (RFM) for solving PDEs, a natural bridge between traditional and machine learning-based algorithms. RFM is based on a combination of well-known ideas: 1. representation of the approximate solution using random feature functions; 2. collocation method to take care of the PDE; 3. the penalty method to treat the boundary conditions, which allows us to treat the boundary condition and the PDE in the same footing. We find it crucial to add several additional components including multi-scale representation and rescaling the weights in the loss function. We demonstrate that the method exhibits spectral accuracy and can compete with traditional solvers in terms of both accuracy and efficiency. In addition, we find that RFM is particularly suited for complex problems with complex geometry, where both traditional and machine learning-based algorithms encounter difficulties.},
publisher = {Global Science Press}
}
@article{Baek2024,
author = {Baek, Jonghyuk and Wang, Yanran and Chen, Jiun-Shyan},
journal = {Computer Methods in Applied Mechanics and Engineering},
title = {N-adaptive ritz method: A neural network enriched partition of unity for boundary value problems},
year = {2024},
issn = {0045-7825},
month = aug,
note = {(math)},
pages = {117070},
volume = {428},
doi = {10.1016/j.cma.2024.117070},
abstract = {Conventional finite element methods are known to be tedious in adaptive refinements due to their conformal regularity requirements. Further, the enrichment functions for adaptive refinements are often not readily available in general applications. This work introduces a novel neural network-enriched Partition of Unity (NN-PU) approach for solving boundary value problems via artificial neural networks with a potential energy-based loss function minimization. The flexibility and adaptivity of the NN function space are utilized to capture complex solution patterns that the conventional Galerkin methods fail to capture. The NN enrichment is constructed by combining pre-trained feature-encoded NN blocks with an additional untrained NN block. The pre-trained NN blocks learn specific local features during the offline stage, enabling efficient enrichment of the approximation space during the online stage through the Ritz-type energy minimization. The NN enrichment is introduced under the Partition of Unity (PU) framework, ensuring convergence of the proposed method. The proposed NN-PU approximation and feature-encoded transfer learning form an adaptive approximation framework, termed the neural-refinement (n-refinement), for solving boundary value problems. Demonstrated by solving various elasticity problems, the proposed method offers accurate solutions while notably reducing the computational cost compared to the conventional adaptive refinement in the mesh-based methods.},
publisher = {Elsevier BV}
}
@inproceedings{Krauss2024,
author = {Krauss, Henrik and Habich, Tim-Lukas and Bartholdt, Max and Seel, Thomas and Schappler, Moritz},
booktitle = {Proceedings of the 21st International Conference on Informatics in Control, Automation and Robotics},
title = {Domain-Decoupled Physics-informed Neural Networks with Closed-Form Gradients for Fast Model Learning of Dynamical Systems},
year = {2024},
note = {(math)},
pages = {55--66},
publisher = {SCITEPRESS - Science and Technology Publications},
abstract = {Physics-informed neural networks (PINNs) are trained using physical equations and can also incorporate unmodeled effects by learning from data. PINNs for control (PINCs) of dynamical systems are gaining interest due to their prediction speed compared to classical numerical integration methods for nonlinear state-space models, making them suitable for real-time control applications. We introduce the domain-decoupled physics-informed neural network (DD-PINN) to address current limitations of PINC in handling large and complex nonlinear dynamical systems. The time domain is decoupled from the feed-forward neural network to construct an Ansatz function, allowing for calculation of gradients in closed form. This approach significantly reduces training times, especially for large dynamical systems, compared to PINC, which relies on graph-based automatic differentiation. Additionally, the DD-PINN inherently fulfills the initial condition and supports higher-order excitation inputs, simplifying the training process and enabling improved prediction accuracy. Validation on three systems - a nonlinear mass-spring-damper, a five-mass-chain, and a two-link robot - demonstrates that the DD-PINN achieves significantly shorter training times. In cases where the PINC's prediction diverges, the DD-PINN's prediction remains stable and accurate due to higher physics loss reduction or use of a higher-order excitation input. The DD-PINN allows for fast and accurate learning of large dynamical systems previously out of reach for the PINC.},
doi = {10.5220/0012935200003822}
}
@article{Satyadharma2025,
author = {Satyadharma, Adhika and Kan, Heng-Chuan and Chern, Ming-Jyh and Yu, Chun-Ying},
journal = {Computers & Fluids},
title = {Numerical error estimation with physics informed neural network},
year = {2025},
issn = {0045-7930},
month = aug,
note = {(math)},
pages = {106700},
volume = {299},
abstract = {Quantifying numerical error has been a major issue in computational fluid dynamics, mostly due to its most
dominant term, the discretization error. Practically, this is the influence of the mesh and time step, which
can substantially affect the final result. However, due to its nature, quantifying discretization error typically
requires several fine mesh simulations, which can be very expensive to perform. In this research, we propose a
new way to calculate numerical error as a whole, which is done by utilizing physics-informed neural network
(PINN). By simultaneously referencing the discrete simulation data and the continuous governing equation,
PINN can detect any disagreement between the two and convert it into an estimate of the numerical error.
This study explains this framework and demonstrates it on several cases, including a one-dimensional heat
conduction, problem set with the method of manufactured solutions and a cavity flow simulation at Reynolds
number 1000. While it can be challenging to implement this framework on very fine mesh and it can only
evaluate a single type of variable at a time, it does offer two major benefits. The results show that our proposed
framework can reliably and accurately estimate the numerical error across a variety of mesh sizes, from a fine
mesh to a very coarse mesh, even if the data are outside the asymptotic range. It also requires only a single
simulation dataset, eliminating the need to perform several fine mesh simulations and proper mesh refinements.},
doi = {10.1016/j.compfluid.2025.106700},
publisher = {Elsevier BV}
}
@article{BarryStraume2025,
author = {Barry-Straume, Jostein and Sarshar, Arash and Popov, Andrey A. and Sandu, Adrian},
journal = {Communications on Applied Mathematics and Computation},
title = {Physics-Informed Neural Networks for PDE-Constrained Optimization and Control},
year = {2025},
issn = {2661-8893},
month = aug,
note = {(math)},
abstract = {The goal of optimal control is to determine a sequence of inputs for maximizing or minimizing a given performance criterion subject to the dynamics and constraints of the system under observation. This work introduces Control Physics-Informed Neural Networks (PINNs), which simultaneously learn both the system states and the optimal control signal in a single-stage framework that leverages the system’s underlying physical laws. While prior approaches often follow a two-stage process-modeling, the system first and then devising its control—the presented novel framework embeds the necessary optimality conditions directly into the network architecture and loss function. We demonstrate the effectiveness of the novel methodology by solving various open-loop optimal control problems governed by analytical, one-dimensional, and two-dimensional partial differential equations (PDEs).},
doi = {10.1007/s42967-025-00499-x},
publisher = {Springer Science and Business Media LLC}
}
@comment{{jabref-meta: databaseType:bibtex;}}
This file was generated by bibtex2html 1.99.