From 71d32d72ef887ec6eeba0621b9c9badf5d6ff0e8 Mon Sep 17 00:00:00 2001 From: Pierre HUBERT Date: Sun, 20 Jul 2025 18:07:22 +0200 Subject: [PATCH] Can extract date of expenses --- .../lib/routes/scan/scan_screen.dart | 10 ++-- moneymgr_mobile/lib/utils/ocr_utils.dart | 51 ++++++++++++++++--- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/moneymgr_mobile/lib/routes/scan/scan_screen.dart b/moneymgr_mobile/lib/routes/scan/scan_screen.dart index 5b1f008..884ef21 100644 --- a/moneymgr_mobile/lib/routes/scan/scan_screen.dart +++ b/moneymgr_mobile/lib/routes/scan/scan_screen.dart @@ -13,10 +13,10 @@ part 'scan_screen.g.dart'; /// Scan a document & return generated PDF as byte file @riverpod -Future<(Uint8List?, double?)> _scanDocument(Ref ref) async { +Future<(Uint8List?, BaseExpenseInfo?)> _scanDocument(Ref ref) async { final pdf = await scanDocAsPDF(); final img = await renderPdf(pdfBytes: pdf); - final amount = await extractTotalFromBill(img); + final amount = await extractInfoFromBill(img); return (pdf, amount); } @@ -42,11 +42,7 @@ class ScanScreen extends HookConsumerWidget { child: switch (scanDocProvider) { AsyncData(:final value) when value.$1 != null => ExpenseEditor( file: value.$1!, - initialData: BaseExpenseInfo( - label: null, - cost: value.$2 ?? 0.0, - time: DateTime.now(), - ), + initialData: value.$2, onFinished: (expense) async { await expenses.add( info: expense, diff --git a/moneymgr_mobile/lib/utils/ocr_utils.dart b/moneymgr_mobile/lib/utils/ocr_utils.dart index bc6fe6e..f454f3c 100644 --- a/moneymgr_mobile/lib/utils/ocr_utils.dart +++ b/moneymgr_mobile/lib/utils/ocr_utils.dart @@ -5,9 +5,10 @@ import 'dart:ui' as ui; import 'package:flutter/material.dart'; import 'package:google_mlkit_text_recognition/google_mlkit_text_recognition.dart'; import 'package:logging/logging.dart'; +import 'package:moneymgr_mobile/services/storage/expenses.dart'; -/// Attempt to extract total amount from invoice image -Future extractTotalFromBill(Uint8List imgBuff) async { +/// Attempt to extract information from invoice image +Future extractInfoFromBill(Uint8List imgBuff) async { final decodedImage = await decodeImageFromList(imgBuff); final byteData = await decodedImage.toByteData( @@ -25,20 +26,54 @@ Future extractTotalFromBill(Uint8List imgBuff) async { Logger.root.fine("Expense text: ${extractionResult.text}"); - // Check for highest amount on invoice - final regexp = RegExp( + // Check for highestCost amount on invoice + final costRegexp = RegExp( r'([0-9]+([ ]*(\\.|,)[ ]*[0-9]{1,2}){0,1})([ \\t\\n]*(EUR|eur|€)|E)', multiLine: true, caseSensitive: false, ); - var highest = 0.0; - for (final match in regexp.allMatches(extractionResult.text)) { + var highestCost = 0.0; + for (final match in costRegexp.allMatches(extractionResult.text)) { if (match.groupCount == 0) continue; // Process only numeric value final value = (match.group(1) ?? "").replaceAll(",", "."); - highest = max(highest, double.tryParse(value) ?? 0.0); + highestCost = max(highestCost, double.tryParse(value) ?? 0.0); } - return highest == 0.0 ? null : highest; + // Check for highestCost amount on invoice + final dateRegexp = RegExp( + r'([0-3][0-9])(\/|-)([0-1][0-9])(\/|-)(20[0-9]{2})', + multiLine: true, + caseSensitive: false, + ); + final currDate = DateTime.now(); + DateTime? newest; + for (final match in dateRegexp.allMatches(extractionResult.text)) { + if (match.groupCount < 5) continue; + + try { + final date = DateTime( + int.tryParse(match.group(5)!) ?? currDate.year, + int.tryParse(match.group(3)!) ?? currDate.month, + int.tryParse(match.group(1)!) ?? currDate.day, + ); + + if (newest == null) { + newest = date; + } else { + newest = DateTime.fromMillisecondsSinceEpoch( + max(newest.millisecondsSinceEpoch, date.millisecondsSinceEpoch), + ); + } + } catch (e, s) { + Logger.root.warning("Failed to parse date! $e$s"); + } + } + + return BaseExpenseInfo( + label: null, + cost: highestCost, + time: newest?.isBefore(currDate) ?? false ? newest! : currDate, + ); }