@dovid
אשמח אם תפשיט לי את המילים "פרמטרי האחזור". מה בדיוק החסרון בFTS המהירות? או אולי כוונתך לאיך התוצאות יוצגו?
למעשה ישבתי על SQLITE קצת לא יודע למה כל כך הסתבכתי לפני זה עם התכנות הבסיסי אכן זה די פשוט. (הוספתי פה גם רכיבים כדי להקל על מי שרוצה לראות את התוצאה הסופית).
התוצאות שקיבלתי - אינדוקס די מהיר - אבל חיפוש קצת איטי אולי שיטת החיפוש שלי לא טובה? (השתמשתי ב-סריקה מלאה כדי לאפשר חיפוש לא מדויק)
using System;
using System.Collections.Generic;
using System.Data;
using System.Data.SQLite;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Text;
using System.Text.RegularExpressions;
using System.Windows.Forms;
using static System.Windows.Forms.VisualStyles.VisualStyleElement;
namespace WindowsFormsApp1
{
public partial class Form1 : Form
{
private System.Windows.Forms.Button buttonProcessFile;
private System.Windows.Forms.Button buttonViewTable;
private System.Windows.Forms.TextBox textBoxResult;
private System.Windows.Forms.ProgressBar progressBar;
private System.Windows.Forms.ProgressBar progressBar2;
private System.Windows.Forms.Button buttonSearch;
private DataGridView dataGridViewSearchResults;
private System.Windows.Forms.TextBox textBoxSearch;
public Form1()
{
InitializeComponent();
InitializeDynamicControls();
}
private void InitializeDynamicControls()
{
// Create a Button for processing
buttonProcessFile = new System.Windows.Forms.Button();
buttonProcessFile.Text = "Process Text File";
buttonProcessFile.Location = new System.Drawing.Point(12, 12);
buttonProcessFile.Click += buttonProcessFile_Click;
this.Controls.Add(buttonProcessFile);
// Create a Button for processing
buttonViewTable = new System.Windows.Forms.Button();
buttonViewTable.Text = "View";
buttonViewTable.Location = new System.Drawing.Point(100, 12);
buttonViewTable.Click += buttonViewTable_Click;
this.Controls.Add(buttonViewTable);
//create progressbar
//progressBar = new System.Windows.Forms.ProgressBar();
//progressBar.Dock = DockStyle.Bottom;
//this.Controls.Add(progressBar);
//create progressbar
progressBar2 = new System.Windows.Forms.ProgressBar();
progressBar2.Dock = DockStyle.Bottom;
this.Controls.Add(progressBar2);
// Create a TextBox for displaying results
textBoxResult = new System.Windows.Forms.TextBox();
textBoxResult.Multiline = true;
textBoxResult.ScrollBars = ScrollBars.Vertical;
textBoxResult.Size = new System.Drawing.Size(400, 200);
textBoxResult.Location = new System.Drawing.Point(12, 50);
this.Controls.Add(textBoxResult);
// Create a TextBox for entering search terms
textBoxSearch = new System.Windows.Forms.TextBox();
textBoxSearch.Location = new System.Drawing.Point(200, 12);
this.Controls.Add(textBoxSearch);
// Create a Button for initiating the search
buttonSearch = new System.Windows.Forms.Button();
buttonSearch.Text = "Search";
buttonSearch.Location = new System.Drawing.Point(300, 12);
buttonSearch.Click += buttonSearch_Click;
this.Controls.Add(buttonSearch);
// Create a DataGridView for displaying search results
dataGridViewSearchResults = new DataGridView();
dataGridViewSearchResults.Dock = DockStyle.Bottom;
this.Controls.Add(dataGridViewSearchResults);
}
private void buttonSearch_Click(object sender, EventArgs e)
{
string searchTerm = textBoxSearch.Text.Trim();
if (string.IsNullOrWhiteSpace(searchTerm))
{
MessageBox.Show("Please enter a search term.");
return;
}
string databaseFilePath = GetDatabaseFilePath();
using (SQLiteConnection connection = new SQLiteConnection($"Data Source={databaseFilePath};Version=3;"))
{
connection.Open();
using (SQLiteCommand command = new SQLiteCommand("SELECT * FROM WordData WHERE Word LIKE @searchTerm;", connection))
{
command.Parameters.Add(new SQLiteParameter("@searchTerm", "%" + searchTerm + "%"));
using (SQLiteDataAdapter adapter = new SQLiteDataAdapter(command))
{
DataTable dataTable = new DataTable();
adapter.Fill(dataTable);
// Display the search results in the DataGridView
dataGridViewSearchResults.DataSource = dataTable;
}
}
}
}
static string GetDatabaseFilePath()
{
// Get the path to the executable directory
string programFolder = Path.GetDirectoryName(Assembly.GetEntryAssembly().Location);
return Path.Combine(programFolder, "your-database-file.sqlite");
}
private void buttonProcessFile_Click(object sender, EventArgs e)
{
// Create or open the SQLite database
string databaseFilePath = GetDatabaseFilePath();
using (SQLiteConnection connection = new SQLiteConnection($"Data Source={databaseFilePath};Version=3;"))
{
connection.Open();
// Create a table to store word data
CreateTable(connection);
// Let the user choose a folder to scan for text files
string folderPath = GetFolderPath();
if (folderPath != null)
{
ProcessFolder(connection, folderPath);
textBoxResult.AppendText("Data saved to SQLite table." + Environment.NewLine);
}
else
{
textBoxResult.AppendText("No folder selected. Exiting..." + Environment.NewLine);
}
}
}
private string GetFolderPath()
{
using (FolderBrowserDialog folderDialog = new FolderBrowserDialog())
{
folderDialog.Description = "Select a folder to scan for text files.";
DialogResult result = folderDialog.ShowDialog();
if (result == DialogResult.OK)
{
return folderDialog.SelectedPath;
}
else
{
return null; // User canceled the folder selection
}
}
}
private void ProcessFolder(SQLiteConnection connection, string folderPath)
{
string[] textFiles = Directory.GetFiles(folderPath, "*.txt", SearchOption.AllDirectories);
progressBar2.Maximum = textFiles.Length;
progressBar2.Value = 0;
int fileCount = 0;
foreach (string textFilePath in textFiles)
{
ProcessTextFile(connection, textFilePath);
fileCount++;
progressBar2.Value++;
}
textBoxResult.AppendText($"Processed {fileCount} text files and saved data to SQLite table." + Environment.NewLine);
}
private void ProcessTextFile(SQLiteConnection connection, string textFilePath)
{
// Read the text file using Windows-1255 encoding
string[] lines = File.ReadAllLines(textFilePath, Encoding.GetEncoding(1255));
//progressBar.Value = 0;
//progressBar.Maximum = lines.Length;
int lineNumber = 1;
List<WordData> wordDataList = new List<WordData>();
// Regular expression to match Hebrew characters
Regex hebrewRegex = new Regex(@"\p{IsHebrew}+(?<=\p{IsHebrew})\""(?=\p{IsHebrew})\p{IsHebrew}+|\p{IsHebrew}{2,}");
foreach (string line in lines)
{
// Use the regular expression to match and extract Hebrew words
MatchCollection matches = hebrewRegex.Matches(line);
foreach (Match match in matches)
{
string word = NormalizeHebrewText(match.Value);
// Add word data to the list
wordDataList.Add(new WordData
{
Word = word,
LineNumber = lineNumber,
FileName = textFilePath
});
}
lineNumber++;
//if (progressBar.Value < progressBar.Maximum)
//{ progressBar.Value++; }
}
//// Sort the words using Hebrew culture
//wordDataList.Sort((a, b) => string.Compare(a.Word, b.Word, new CultureInfo("he-IL"), CompareOptions.None));
// Insert all word data into the SQLite table at once
InsertWordDataBatch(connection, wordDataList);
textBoxResult.AppendText($"{textFilePath} Processed {lines.Length} lines and saved data to SQLite table." + Environment.NewLine);
}
private string NormalizeHebrewText(string text)
{
// Normalize Hebrew text (e.g., remove diacritics)
// You may need to implement this normalization based on your specific requirements.
// Example: Normalize to remove diacritics (NFD normalization)
text = new string(text.Normalize(NormalizationForm.FormD).Where(c => char.GetUnicodeCategory(c) != UnicodeCategory.NonSpacingMark).ToArray()); // Normalize Hebrew text.
return text; // Return the normalized text.
}
static void CreateTable(SQLiteConnection connection)
{
using (SQLiteCommand command = new SQLiteCommand(
"CREATE TABLE IF NOT EXISTS WordData (Word TEXT, LineNumber INT, FileName TEXT);", connection))
{
command.ExecuteNonQuery();
}
}
static void InsertWordDataBatch(SQLiteConnection connection, List<WordData> wordDataList)
{
using (SQLiteCommand command = new SQLiteCommand(connection))
{
// Start a transaction for batch insert
using (var transaction = connection.BeginTransaction())
{
command.CommandText = "INSERT INTO WordData (Word, LineNumber, FileName) VALUES (@word, @lineNumber, @fileName);";
command.Parameters.Add(new SQLiteParameter("@word", DbType.String));
command.Parameters.Add(new SQLiteParameter("@lineNumber", DbType.Int32));
command.Parameters.Add(new SQLiteParameter("@fileName", DbType.String));
foreach (var wordData in wordDataList)
{
command.Parameters["@word"].Value = wordData.Word;
command.Parameters["@lineNumber"].Value = wordData.LineNumber;
command.Parameters["@fileName"].Value = wordData.FileName;
// Add the command to the transaction
command.ExecuteNonQuery();
}
// Commit the transaction to perform the batch insert
transaction.Commit();
}
}
}
private void buttonViewTable_Click(object sender, EventArgs e)
{
string databaseFilePath = GetDatabaseFilePath();
using (SQLiteConnection connection = new SQLiteConnection($"Data Source={databaseFilePath};Version=3;"))
{
connection.Open();
using (SQLiteCommand command = new SQLiteCommand("SELECT * FROM WordData;", connection))
{
using (SQLiteDataAdapter adapter = new SQLiteDataAdapter(command))
{
DataTable dataTable = new DataTable();
adapter.Fill(dataTable);
// Create a new form to display the table
Form tableForm = new Form();
tableForm.Text = "WordData Table Contents";
// Create a DataGridView control and set its properties
DataGridView dataGridView = new DataGridView();
dataGridView.Dock = DockStyle.Fill;
dataGridView.DataSource = dataTable;
// Add the DataGridView to the form
tableForm.Controls.Add(dataGridView);
// Show the form as a dialog
tableForm.ShowDialog();
}
}
}
}
public class WordData
{
public string Word { get; set; }
public int LineNumber { get; set; }
public string FileName { get; set; }
}
}
}
ועוד שאלה:
@dovid כתב בביצוע פרקטי לאינדוקס מאגר טקסט עברי:
לגבי הפרוייקט שלכם תוכלו לבנות רשימת מילים, ולהכניס לטבלה את הפרטים הבאים:
המילה, מס' ספר, מס' שורה/פסקה, מיקום
בטבלה אחרת להכניס את הנתונים עצמם לשליפה מהירה של פסקאות של ספרים, ככה:
מס' פסקה (מספר רץ), פסקה, מס' ספר
לא הצלחתי למצוא את המכנה המשותף בין שני הטבלאות שמאפשר לי להצליב בין המידע שאקבל מטבלה א' לצורך טבלה ב'.
אולי עם דוגמא קצרה אוכל לרדת לסוף דעתך.