Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions .github/workflows/rust.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: Rust

on:
push:
branches: [ "master" ]
pull_request:
branches: [ "master" ]

env:
CARGO_TERM_COLOR: always

jobs:
build:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y \
build-essential \
pkg-config \
clang \
llvm-dev \
libclang-dev \
libleptonica-dev \
libtesseract-dev \
tesseract-ocr \
tesseract-ocr-rus \
tesseract-ocr-eng \
python3 \
python3-pip

- name: Install Rust
uses: dtolnay/rust-toolchain@stable

- name: Cache cargo
uses: Swatinem/rust-cache@v2
with:
workspaces: parser

- name: Install maturin
working-directory: parser
run: pip3 install maturin

- name: Run clippy
working-directory: parser
run: cargo clippy --release -- -D warnings

- name: Build extension
working-directory: parser
run: maturin build --release

- name: Run tests
working-directory: parser
run: cargo test --release
35 changes: 0 additions & 35 deletions parser/assets/tests_results/extract_from_image.txt

This file was deleted.

Empty file.
1 change: 1 addition & 0 deletions parser/assets/tests_results/extract_from_image_ru.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
МЯУ=191919
2 changes: 1 addition & 1 deletion parser/assets/tests_results/extract_media.txt

Large diffs are not rendered by default.

37 changes: 1 addition & 36 deletions parser/assets/tests_results/extract_text_from_docx_with_png.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,4 @@ A11 (в ней же таблица)A11_11 A11_12 A11_21 A11_22 a12 a13 a21 a23



KP
Имеются следующие данные о величине товарооборота для 50 магазинов города (A, —
товарооборот, усл. руб.; п — число магазинов)
[A, [ о, 50) ][50, 100) [ [100, 150) (150, 200) ] [200, 250) [ [250, 300)
lsl e o [ ¢ [ ¢ 1 5 |
ПОСТРОИТЬ гистограмму и полигон частот
Гистограмма:
Полигон:
Записать эмпирическую функцию распределения и построить её
трафик
. 5 ‘Ё 25 o #< 25
s B<eST |03, 25 <а 75
_ 2, 15<2 < 125 0.54, 75 < т < 125
Е, = { №8, 125 < ш < 115 = { 0.72, 125 < @ < 175
B 5<< 905 |086, 175 < @ < 225
T 295 < а < 275 294› 225<?5227755
1, ® > 275 i ы
Определить числовые характеристики вариационного ряда:
® 5°,5,6°,6
Выборочное среднее:
1
т = р° (25-15 + 7512 + 125 :9 + 1757 + 225 +4 + 275-3) = 107
ы
Выборочная дисперсия (смещенная оценка дисперсии):
1
& = ;° (267154752 124 125% - 9 + 17527+ 2257 -4 + 2752 - 3) —
E
—107 = 17118
Выборочная дисперсия (несмещенная оценка дисперсии):
50
а° = — - 17118 = 17467.35
° 7%
Выборочное среднее квадратичное отклонение (смещенное)
5 = 130.84
Выборочное среднее квадратичное отклонение (несмещенное)
в = 132.16
МЯУ=191919
2 changes: 1 addition & 1 deletion parser/assets/tests_results/extract_xml_info.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
media/image.png : rId663977569;
media/image1.png : rId2;
Binary file removed parser/assets/text_from_img.png
Binary file not shown.
Binary file added parser/assets/text_from_img_en.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added parser/assets/text_from_img_ru.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified parser/assets/text_tables_png.docx
Binary file not shown.
21 changes: 6 additions & 15 deletions parser/src/parsers/docx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,12 +298,9 @@ mod tests {

assert_eq!(
res,
match String::from_utf8(read_data_from_file(
String::from_utf8(read_data_from_file(
"assets/tests_results/extract_xml_info.txt"
)?) {
Ok(str) => str,
Err(err) => panic!("{err}"),
}
)?)?
);

Ok(())
Expand All @@ -322,12 +319,9 @@ mod tests {

assert_eq!(
res,
match String::from_utf8(read_data_from_file(
String::from_utf8(read_data_from_file(
"assets/tests_results/extract_media.txt"
)?) {
Ok(str) => str,
Err(err) => panic!("{err}"),
}
)?)?
);

Ok(())
Expand All @@ -339,11 +333,8 @@ mod tests {
let res = pars.get_from_docx(&data)?;

assert_eq!(
res,
match String::from_utf8(read_data_from_file(check_file)?) {
Ok(str) => str,
Err(err) => panic!("{err}"),
}
res.trim(),
String::from_utf8(read_data_from_file(check_file)?)?.trim()
);
Ok(())
}
Expand Down
24 changes: 20 additions & 4 deletions parser/src/parsers/image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,31 @@ mod tests {
}

#[test]
fn extract_from_image() -> Result<()> {
let data = read_data_from_file("assets/text_from_img.png")?;
fn extract_from_image_en() -> Result<()> {
let data = read_data_from_file("assets/text_from_img_en.png")?;
let res = get_from_image(&data)?;

assert_eq!(
res,
res.trim(),
String::from_utf8(read_data_from_file(
"assets/tests_results/extract_from_image.txt"
"assets/tests_results/extract_from_image_en.txt"
)?)?
.trim()
);
Ok(())
}

#[test]
fn extract_from_image_ru() -> Result<()> {
let data = read_data_from_file("assets/text_from_img_ru.png")?;
let res = get_from_image(&data)?;

assert_eq!(
res.trim(),
String::from_utf8(read_data_from_file(
"assets/tests_results/extract_from_image_ru.txt"
)?)?
.trim()
);
Ok(())
}
Expand Down