Browse Source

pdf可以解析出来了

master
hejl 1 month ago
parent
commit
c0e49549ab
  1. 4
      win_text_editor/lib/menus/app_menu.dart
  2. 5
      win_text_editor/lib/menus/menu_actions.dart
  3. 1
      win_text_editor/lib/menus/menu_constants.dart
  4. 5
      win_text_editor/lib/modules/module_router.dart
  5. 157
      win_text_editor/lib/modules/pdf_parse/controllers/pdf_parse_controller.dart
  6. 162
      win_text_editor/lib/modules/pdf_parse/controllers/pdfium_bindings.dart
  7. 61
      win_text_editor/lib/modules/pdf_parse/widgets/pdf_parse_output.dart
  8. 83
      win_text_editor/lib/modules/pdf_parse/widgets/pdf_parse_view.dart
  9. 14
      win_text_editor/pubspec.lock
  10. 3
      win_text_editor/pubspec.yaml
  11. 4
      win_text_editor/备忘.txt

4
win_text_editor/lib/menus/app_menu.dart

@ -51,6 +51,10 @@ class AppMenu extends StatelessWidget { @@ -51,6 +51,10 @@ class AppMenu extends StatelessWidget {
child: ListTile(leading: Icon(Icons.find_in_page), title: Text('XML搜索')),
),
const PopupMenuDivider(),
const PopupMenuItem<String>(
value: MenuConstants.pdfParse,
child: ListTile(leading: Icon(Icons.picture_as_pdf_rounded), title: Text('PDF解析')),
),
const PopupMenuItem<String>(
value: MenuConstants.demo,
child: ListTile(leading: Icon(Icons.view_agenda), title: Text('Demo')),

5
win_text_editor/lib/menus/menu_actions.dart

@ -17,6 +17,7 @@ class MenuActions { @@ -17,6 +17,7 @@ class MenuActions {
MenuConstants.dataCompare: _dataCompare,
MenuConstants.dataExtract: _dataExtract,
MenuConstants.xmlSearch: _xmlSearch,
MenuConstants.pdfParse: _pdfParse,
MenuConstants.memoryTable: _memoryTable,
MenuConstants.uftComponent: _uftComponent,
MenuConstants.callFunction: _callFunction,
@ -81,6 +82,10 @@ class MenuActions { @@ -81,6 +82,10 @@ class MenuActions {
await _openOrActivateTab(context, "XML搜索", RouterKey.xmlSearch, Icons.find_in_page);
}
static Future<void> _pdfParse(BuildContext context) async {
await _openOrActivateTab(context, "PDF解析", RouterKey.pdfParse, Icons.picture_as_pdf_rounded);
}
static Future<void> _demo(BuildContext context) async {
await _openOrActivateTab(context, "Demo", RouterKey.demo, Icons.code);
}

1
win_text_editor/lib/menus/menu_constants.dart

@ -19,6 +19,7 @@ class MenuConstants { @@ -19,6 +19,7 @@ class MenuConstants {
static const String dataCompare = 'data_compare';
static const String dataExtract = 'data_extract';
static const String xmlSearch = 'xml_search';
static const String pdfParse = 'pdf_parse';
static const String demo = 'demo';
// AIGC菜单项

5
win_text_editor/lib/modules/module_router.dart

@ -15,6 +15,8 @@ import 'package:win_text_editor/modules/demo/controllers/demo_controller.dart'; @@ -15,6 +15,8 @@ import 'package:win_text_editor/modules/demo/controllers/demo_controller.dart';
import 'package:win_text_editor/modules/demo/widgets/demo_view.dart';
import 'package:win_text_editor/modules/memory_table/controllers/memory_table_controller.dart';
import 'package:win_text_editor/modules/memory_table/widgets/memory_table_view.dart';
import 'package:win_text_editor/modules/pdf_parse/controllers/pdf_parse_controller.dart';
import 'package:win_text_editor/modules/pdf_parse/widgets/pdf_parse_view.dart';
import 'package:win_text_editor/modules/uft_component/controllers/uft_component_controller.dart';
import 'package:win_text_editor/modules/uft_component/widgets/uft_component_view.dart';
import 'package:win_text_editor/modules/xml_search/controllers/xml_search_controller.dart';
@ -33,6 +35,7 @@ class RouterKey { @@ -33,6 +35,7 @@ class RouterKey {
static const String dataCompare = 'data_compare';
static const String dataExtract = 'data_extract';
static const String xmlSearch = 'xml_search';
static const String pdfParse = 'pdf_parse';
static const String memoryTable = 'memory_table';
static const String uftComponent = 'uft_component';
static const String callFunction = 'call_function';
@ -50,6 +53,7 @@ class ModuleRouter { @@ -50,6 +53,7 @@ class ModuleRouter {
RouterKey.dataCompare: (tab) => DataCompareController(),
RouterKey.dataExtract: (tab) => DataExtractController(),
RouterKey.xmlSearch: (tab) => XmlSearchController(),
RouterKey.pdfParse: (tab) => PdfParseController(),
RouterKey.memoryTable: (tab) => MemoryTableController(),
RouterKey.uftComponent: (tab) => UftComponentController(),
RouterKey.callFunction: (tab) => CallFunctionController(),
@ -65,6 +69,7 @@ class ModuleRouter { @@ -65,6 +69,7 @@ class ModuleRouter {
RouterKey.dataCompare: (tab, controller) => DataCompareView(tabId: tab.id),
RouterKey.dataExtract: (tab, controller) => DataExtractView(tabId: tab.id),
RouterKey.xmlSearch: (tab, controller) => XmlSearchView(tabId: tab.id),
RouterKey.pdfParse: (tab, controller) => PdfParseView(tabId: tab.id),
RouterKey.memoryTable: (tab, controller) => MemoryTableView(tabId: tab.id),
RouterKey.uftComponent: (tab, controller) => UftComponentView(tabId: tab.id),
RouterKey.callFunction: (tab, controller) => CallFunctionView(tabId: tab.id),

157
win_text_editor/lib/modules/pdf_parse/controllers/pdf_parse_controller.dart

@ -0,0 +1,157 @@ @@ -0,0 +1,157 @@
import 'package:file_picker/file_picker.dart';
import 'package:win_text_editor/framework/controllers/logger.dart';
import 'package:win_text_editor/modules/outline/models/outline_node.dart';
import 'package:win_text_editor/shared/base/base_content_controller.dart';
// Import the PDFium bindings
import 'pdfium_bindings.dart';
class PdfParseController extends BaseContentController {
String _filePath = '';
String? _errorMessage;
PdfDocument? _pdfDocument;
Map<String, String> _contentSections = {};
List<List<List<String>>> _tables = []; //
List<String> _nonTableContent = [];
String get filePath => _filePath;
String? get errorMessage => _errorMessage;
Map<String, String> get contentSections => _contentSections;
List<List<List<String>>> get tables => _tables;
@override
void dispose() {
_pdfDocument?.dispose();
super.dispose();
}
Future<void> pickFile() async {
final result = await FilePicker.platform.pickFiles(
type: FileType.custom,
allowedExtensions: ['pdf'],
);
if (result != null) {
_filePath = result.files.single.path!;
notifyListeners();
await _loadPdfContent();
}
}
Future<void> setFilePath(String path) async {
_filePath = path;
notifyListeners();
await _loadPdfContent();
}
Future<void> _loadPdfContent() async {
try {
// Dispose of previous document if exists
_pdfDocument?.dispose();
_pdfDocument = null;
// Load new document
_pdfDocument = PdfDocument.fromFile(_filePath);
// Extract text from all pages
String allText = '';
for (var i = 0; i < _pdfDocument!.pageCount; i++) {
allText += _pdfDocument!.getPageText(i) + '\n';
}
await _extractDocumentSections(allText);
notifyListeners();
} catch (e) {
_errorMessage = 'Failed to load PDF: ${e.toString()}';
notifyListeners();
Logger().error(_errorMessage!);
}
}
// Rest of the methods remain the same...
Future<void> _extractDocumentSections(String allText) async {
_contentSections.clear();
_tables.clear();
_nonTableContent.clear();
//
final lines =
allText.split('\n').map((line) => line.trim()).where((line) => line.isNotEmpty).toList();
//
_extractTablesAndNonTables(lines);
//
_contentSections['表格外'] = _nonTableContent.join('\n');
}
void _extractTablesAndNonTables(List<String> lines) {
List<List<String>>? currentTable;
bool inTable = false;
for (final line in lines) {
// 22+
final columns = line.split(RegExp(r'\t|\s{2,}')).where((e) => e.isNotEmpty).toList();
if (columns.length >= 2) {
//
inTable = true;
currentTable ??= [];
currentTable.add(columns);
} else {
//
if (inTable) {
// 2
if (currentTable != null && currentTable.length >= 2) {
_tables.add(currentTable);
}
currentTable = null;
inTable = false;
}
//
_nonTableContent.add(line);
}
}
//
if (currentTable != null && currentTable.length >= 2) {
_tables.add(currentTable);
}
}
String? genContentString(List<String> sections) {
final buffer = StringBuffer();
for (final section in sections) {
if (section == '表格' && _tables.isNotEmpty) {
buffer.writeln('===== 表格内容 =====');
for (var tableIndex = 0; tableIndex < _tables.length; tableIndex++) {
buffer.writeln('----- 表格 ${tableIndex + 1} -----');
final table = _tables[tableIndex];
for (var rowIndex = 0; rowIndex < table.length; rowIndex++) {
buffer.writeln('${rowIndex + 1}: ${table[rowIndex].join(' | ')}');
}
buffer.writeln();
}
buffer.writeln();
} else if (_contentSections.containsKey(section)) {
buffer.writeln('===== $section =====');
buffer.writeln(_contentSections[section]);
buffer.writeln();
}
}
return buffer.isEmpty ? null : buffer.toString();
}
@override
Future<void> onOpenFile(String filePath, {dynamic appendArg}) async {
await setFilePath(filePath);
}
@override
void onOpenFolder(String folderPath) {}
@override
void onDropOutlineNode(OutlineNode node) {}
}

162
win_text_editor/lib/modules/pdf_parse/controllers/pdfium_bindings.dart

@ -0,0 +1,162 @@ @@ -0,0 +1,162 @@
import 'dart:ffi';
import 'dart:io';
import 'package:ffi/ffi.dart';
// PDFium FFI Bindings
final DynamicLibrary pdfiumLib = _loadPdfiumLibrary();
DynamicLibrary _loadPdfiumLibrary() {
if (Platform.isWindows) {
return DynamicLibrary.open('pdfium.dll');
} else if (Platform.isMacOS) {
return DynamicLibrary.open('libpdfium.dylib');
} else if (Platform.isLinux) {
return DynamicLibrary.open('libpdfium.so');
}
throw UnsupportedError('Unsupported platform');
}
// PDFium Function Bindings
class PDFium {
static final FPDF_InitLibrary = pdfiumLib.lookupFunction<Void Function(), void Function()>(
'FPDF_InitLibrary',
);
static final FPDF_DestroyLibrary = pdfiumLib.lookupFunction<Void Function(), void Function()>(
'FPDF_DestroyLibrary',
);
static final FPDF_LoadDocument = pdfiumLib.lookupFunction<
Pointer<Void> Function(Pointer<Utf8>, Pointer<Utf8>),
Pointer<Void> Function(Pointer<Utf8>, Pointer<Utf8>)
>('FPDF_LoadDocument');
static final FPDF_CloseDocument = pdfiumLib
.lookupFunction<Void Function(Pointer<Void>), void Function(Pointer<Void>)>(
'FPDF_CloseDocument',
);
static final FPDF_GetPageCount = pdfiumLib
.lookupFunction<Int32 Function(Pointer<Void>), int Function(Pointer<Void>)>(
'FPDF_GetPageCount',
);
static final FPDF_LoadPage = pdfiumLib.lookupFunction<
Pointer<Void> Function(Pointer<Void>, Int32),
Pointer<Void> Function(Pointer<Void>, int)
>('FPDF_LoadPage');
static final FPDF_ClosePage = pdfiumLib
.lookupFunction<Void Function(Pointer<Void>), void Function(Pointer<Void>)>('FPDF_ClosePage');
static final FPDFText_LoadPage = pdfiumLib
.lookupFunction<Pointer<Void> Function(Pointer<Void>), Pointer<Void> Function(Pointer<Void>)>(
'FPDFText_LoadPage',
);
static final FPDFText_ClosePage = pdfiumLib
.lookupFunction<Void Function(Pointer<Void>), void Function(Pointer<Void>)>(
'FPDFText_ClosePage',
);
static final FPDFText_CountChars = pdfiumLib
.lookupFunction<Int32 Function(Pointer<Void>), int Function(Pointer<Void>)>(
'FPDFText_CountChars',
);
// Corrected FPDFText_GetText binding - uses Uint16 for Unicode text
static final FPDFText_GetText = pdfiumLib.lookupFunction<
Int32 Function(Pointer<Void>, Int32, Int32, Pointer<Uint16>),
int Function(Pointer<Void>, int, int, Pointer<Uint16>)
>('FPDFText_GetText');
static final FPDF_GetLastError = pdfiumLib.lookupFunction<Uint32 Function(), int Function()>(
'FPDF_GetLastError',
);
}
// Helper class to manage PDFium resources
class PdfDocument {
final Pointer<Void> _docPtr;
PdfDocument._(this._docPtr);
factory PdfDocument.fromFile(String filePath) {
PDFium.FPDF_InitLibrary();
final filePathPtr = filePath.toNativeUtf8();
final passwordPtr = ''.toNativeUtf8();
final docPtr = PDFium.FPDF_LoadDocument(filePathPtr, passwordPtr);
calloc.free(filePathPtr);
calloc.free(passwordPtr);
if (docPtr == nullptr) {
final error = PDFium.FPDF_GetLastError();
throw Exception('Failed to load PDF: ${_getErrorDescription(error)}');
}
return PdfDocument._(docPtr);
}
int get pageCount => PDFium.FPDF_GetPageCount(_docPtr);
String getPageText(int pageIndex) {
final pagePtr = PDFium.FPDF_LoadPage(_docPtr, pageIndex);
if (pagePtr == nullptr) {
throw Exception('Failed to load page $pageIndex');
}
try {
final textPagePtr = PDFium.FPDFText_LoadPage(pagePtr);
if (textPagePtr == nullptr) {
throw Exception('Failed to load text for page $pageIndex');
}
try {
final charCount = PDFium.FPDFText_CountChars(textPagePtr);
if (charCount == 0) {
return '';
}
// Allocate buffer for UTF-16 characters (+1 for null terminator)
final buffer = calloc<Uint16>(charCount + 1);
try {
final copied = PDFium.FPDFText_GetText(textPagePtr, 0, charCount, buffer);
if (copied <= 0) {
return '';
}
// Convert UTF-16 to Dart string
return _utf16PointerToString(buffer, copied);
} finally {
calloc.free(buffer);
}
} finally {
PDFium.FPDFText_ClosePage(textPagePtr);
}
} finally {
PDFium.FPDF_ClosePage(pagePtr);
}
}
static String _utf16PointerToString(Pointer<Uint16> pointer, int length) {
final units = pointer.asTypedList(length);
return String.fromCharCodes(units);
}
void dispose() {
PDFium.FPDF_CloseDocument(_docPtr);
PDFium.FPDF_DestroyLibrary();
}
static String _getErrorDescription(int errorCode) {
switch (errorCode) {
case 1:
return 'File not found or could not be opened';
case 2:
return 'File not in PDF format or corrupted';
case 3:
return 'Password required or incorrect password';
case 4:
return 'Unsupported security scheme';
case 5:
return 'Page not found or content error';
default:
return 'Unknown error';
}
}
}

61
win_text_editor/lib/modules/pdf_parse/widgets/pdf_parse_output.dart

@ -0,0 +1,61 @@ @@ -0,0 +1,61 @@
// uft_component_right_side.dart
import 'package:flutter/material.dart';
import 'package:win_text_editor/modules/pdf_parse/controllers/pdf_parse_controller.dart';
import 'package:win_text_editor/shared/components/code_generation_components.dart';
class PdfParseOutput extends StatefulWidget {
final PdfParseController controller;
final TextEditingController codeController;
const PdfParseOutput({super.key, required this.controller, required this.codeController});
@override
State<PdfParseOutput> createState() => _PdfParseOutputState();
}
class _PdfParseOutputState extends State<PdfParseOutput> {
String? _selectedOperation;
@override
void initState() {
super.initState();
widget.controller.addListener(_updateDisplay);
}
@override
void dispose() {
widget.controller.removeListener(_updateDisplay);
super.dispose();
}
void _updateDisplay() {
if (_selectedOperation != null) {
widget.codeController.text = widget.controller.genContentString([_selectedOperation!]) ?? '';
} else {
widget.codeController.text = '';
}
}
void _selectOperation(String? operation) {
setState(() {
_selectedOperation = operation;
_updateDisplay();
});
}
@override
Widget build(BuildContext context) {
final operations = ['表格内', '表格外'];
return CodeGenerationSection(
title: '生成内容',
codeController: widget.codeController,
onNodeDropped: (node) => widget.controller.onDropOutlineNode(node),
child: OperationRadioSection(
operations: operations,
selectedOperation: _selectedOperation,
onOperationSelected: _selectOperation,
),
);
}
}

83
win_text_editor/lib/modules/pdf_parse/widgets/pdf_parse_view.dart

@ -0,0 +1,83 @@ @@ -0,0 +1,83 @@
import 'package:flutter/material.dart';
import 'package:provider/provider.dart';
import 'package:win_text_editor/framework/controllers/tab_items_controller.dart';
import 'package:win_text_editor/modules/pdf_parse/controllers/pdf_parse_controller.dart';
import 'pdf_parse_output.dart';
class PdfParseView extends StatefulWidget {
final String tabId;
const PdfParseView({super.key, required this.tabId});
@override
State<PdfParseView> createState() => _PdfParseViewState();
}
class _PdfParseViewState extends State<PdfParseView> {
late final PdfParseController _controller;
final TextEditingController _contentController = TextEditingController();
bool _isControllerFromTabManager = false;
get tabManager => Provider.of<TabItemsController>(context, listen: false);
@override
void initState() {
super.initState();
final controllerFromManager = tabManager.getController(widget.tabId);
if (controllerFromManager != null) {
_controller = controllerFromManager;
_isControllerFromTabManager = true;
} else {
_controller = PdfParseController();
_isControllerFromTabManager = false;
tabManager.registerController(widget.tabId, _controller);
}
}
@override
void dispose() {
if (!_isControllerFromTabManager) {
_controller.dispose();
}
super.dispose();
}
@override
Widget build(BuildContext context) {
return ChangeNotifierProvider.value(
value: _controller,
child: Consumer<PdfParseController>(
builder: (context, controller, child) {
return Padding(
padding: const EdgeInsets.all(8.0),
child: Column(
crossAxisAlignment: CrossAxisAlignment.start,
children: [
//
TextField(
decoration: InputDecoration(
labelText: 'PDF File',
hintText: 'Select an PDF file',
suffixIcon: IconButton(
icon: const Icon(Icons.folder_open),
onPressed: _controller.pickFile,
),
border: const OutlineInputBorder(),
errorText: controller.errorMessage,
),
controller: TextEditingController(text: _controller.filePath),
readOnly: true,
),
const SizedBox(height: 4),
Expanded(
child: PdfParseOutput(codeController: _contentController, controller: controller),
),
],
),
);
},
),
);
}
}

14
win_text_editor/pubspec.lock

@ -138,7 +138,7 @@ packages: @@ -138,7 +138,7 @@ packages:
source: hosted
version: "1.3.2"
ffi:
dependency: transitive
dependency: "direct main"
description:
name: ffi
sha256: "289279317b4b16eb2bb7e271abccd4bf84ec9bdcbe999e278a94b804f5630418"
@ -178,18 +178,18 @@ packages: @@ -178,18 +178,18 @@ packages:
dependency: transitive
description:
name: flutter_charset_detector_android
sha256: "443145e8fc8515b3b32aee375691e40dd59197a86a2ae153166bc88c8200d83b"
sha256: "617345b0f78ad56c2633ea6132e57c2e374f6970792afbe9743237f683eeae8e"
url: "https://pub.dev"
source: hosted
version: "3.0.0"
version: "3.1.1"
flutter_charset_detector_darwin:
dependency: transitive
description:
name: flutter_charset_detector_darwin
sha256: daac20390275efb92fbb14350fe11286c5e29c7b80d6b0867f52d760f0d69763
sha256: "65d91133ea8ef06a440941b1126702b4735a8bd487430b41760e706a0b6b26d4"
url: "https://pub.dev"
source: hosted
version: "1.1.0"
version: "1.2.0"
flutter_charset_detector_platform_interface:
dependency: transitive
description:
@ -210,10 +210,10 @@ packages: @@ -210,10 +210,10 @@ packages:
dependency: "direct main"
description:
name: flutter_js
sha256: "0d22d73a474b5b80c3ab5508e7c3eab6fb20beea9dec45bbd21088cfd27a5e61"
sha256: "5bf5db354fe78fe24cb90a5fa6b4423d38712440c88e3445c3dc88bc134c452f"
url: "https://pub.dev"
source: hosted
version: "0.8.3"
version: "0.8.0"
flutter_lints:
dependency: "direct dev"
description:

3
win_text_editor/pubspec.yaml

@ -19,7 +19,6 @@ dependencies: @@ -19,7 +19,6 @@ dependencies:
collection: ^1.17.0
path: ^1.8.0
syncfusion_flutter_datagrid: ^23.1.40
flutter_js: ^0.8.3
xml: ^6.5.0
csv: ^6.0.0
mustache_template: ^2.0.0
@ -33,6 +32,8 @@ dependencies: @@ -33,6 +32,8 @@ dependencies:
quiver: ^3.2.2
flutter_charset_detector: ^5.0.0
docx_to_text: ^1.0.1
flutter_js: 0.8.0
ffi: ^2.0.1
dev_dependencies:
flutter_test:

4
win_text_editor/备忘.txt

@ -8,4 +8,6 @@ $env:FLUTTER_STORAGE_BASE_URL="https://storage.flutter-io.cn" @@ -8,4 +8,6 @@ $env:FLUTTER_STORAGE_BASE_URL="https://storage.flutter-io.cn"
其他备用镜像
$env:PUB_HOSTED_URL="https://mirrors.tuna.tsinghua.edu.cn/dart-pub"
$env:PUB_HOSTED_URL="https://pub.dartlang.org"
境外镜像
$env:PUB_HOSTED_URL="https://pub.dartlang.org"
$env:FLUTTER_STORAGE_BASE_URL=https://storage.googleapis.com
Loading…
Cancel
Save