与迷信AI编程的打个擂台

g
gooog
楼主 (北美华人网)
所有迷信AI编程的,如果你说的是真的,那我说个非常容易理解的编程题。
你把AI编程给你的答案公布在这个版面上。让大家看看,AI编程到底牛还是不牛。
1)同学A写了一篇文本A,同学B写了一篇文本B。 请用C编程,找出文本B中可能剽窃文本A的词块。
2)同学A写了一篇word文档A,同学B写了一篇word文档B。 请用VBA编程,找出文档B中可能剽窃文档A的词块。 并且在B中标识出来。
人在它乡
1:
我将提供一个C程序,用于检测文本B中可能剽窃文本A的词块。我们将使用滑动窗口技术,比较两个文本中的词序列,并寻找匹配的词块(例如,连续3个或更多相同的单词)。以下是实现: #include  #include  #include  #include 
#define MAX_WORDS 1000 #define MAX_WORD_LEN 50 #define MIN_MATCH_LEN 3 // 最小匹配词数
// 分词函数,将文本分割为单词数组 int tokenize(char *text, char words[][MAX_WORD_LEN]) {   int word_count = 0;   char *token = strtok(text, " \n\t,.!?;\"");   while (token != NULL && word_count < MAX_WORDS) {     // 转换为小写以忽略大小写差异     for (int i = 0; token; i++) {       words[word_count] = tolower(token);       words[word_count][i + 1] = '\0';     }     word_count++;     token = strtok(NULL, " \n\t,.!?;\"");   }   return word_count; }
// 比较两个单词数组,寻找匹配的词块 void find_plagiarism(char words_a[][MAX_WORD_LEN], int len_a,            char words_b[][MAX_WORD_LEN], int len_b) {   int found = 0;   for (int i = 0; i <= len_a - MIN_MATCH_LEN; i++) {     for (int j = 0; j <= len_b - MIN_MATCH_LEN; j++) {       int match_len = 0;       // 检查从i和j开始的词序列是否匹配       while (i + match_len < len_a && j + match_len < len_b &&           strcmp(words_a[i + match_len], words_b[j + match_len]) == 0) {         match_len++;       }       // 如果匹配长度达到或超过最小阈值,输出结果       if (match_len >= MIN_MATCH_LEN) {         found = 1;         printf("可能的剽窃片段 (长度 %d):\n", match_len);         printf("文本A: ");         for (int k = 0; k < match_len; k++) {           printf("%s ", words_a[i + k]);         }         printf("\n文本B: ");         for (int k = 0; k < match_len; k++) {           printf("%s ", words_b[j + k]);         }         printf("\n\n");       }     }   }   if (!found) {     printf("未找到可能的剽窃词块。\n");   } }
int main() {   char text_a[10000], text_b[10000];   char words_a[MAX_WORDS][MAX_WORD_LEN], words_b[MAX_WORDS][MAX_WORD_LEN];
  // 输入文本A   printf("请输入文本A:\n");   fgets(text_a, sizeof(text_a), stdin);
  // 输入文本B   printf("请输入文本B:\n");   fgets(text_b, sizeof(text_b), stdin);
  // 分词   int len_a = tokenize(text_a, words_a);   int len_b = tokenize(text_b, words_b);
  // 寻找可能的剽窃词块   find_plagiarism(words_a, len_a, words_b, len_b);
  return 0; } 程序说明 输入处理: 程序通过 fgets 读取文本A和文本B。 使用 strtok 将文本按空格、标点等分割为单词,并存储在二维字符数组中。 单词在比较前转换为小写,以忽略大小写差异。 剽窃检测: 使用滑动窗口技术,比较文本A和文本B中的所有词序列。 当发现连续 MIN_MATCH_LEN(默认设为3)或更多个单词完全匹配时,认为可能是剽窃。 输出匹配的词块及其在两个文本中的内容。 输出: 如果找到匹配的词块,打印匹配的长度和具体内容。 如果未找到匹配,提示未发现剽窃。 使用方法 编译并运行程序(例如,使用 gcc)。 输入文本A(同学A的文本)。 输入文本B(同学B的文本)。 程序会输出所有可能的剽窃词块(连续3个或更多相同单词)。 示例运行 输入: 请输入文本A: The quick brown fox jumps over the lazy dog 请输入文本B: My quick brown fox jumps over the idle cat 输出: 可能的剽窃片段 (长度 5): 文本A: quick brown fox jumps over  文本B: quick brown fox jumps over  注意事项 限制:程序假设文本长度不超过10000字符,每个单词不超过50字符,最多1000个单词。必要时可调整这些常量。 改进空间: 可添加更复杂的匹配算法(如忽略词序、考虑同义词)。 可支持从文件读取文本。 可调整 MIN_MATCH_LEN 来改变检测敏感度。 输入要求:文本应为英文,标点会被忽略。若需支持其他语言,需修改分词逻辑。 如果需要进一步优化或有具体需求(如支持中文、从文件读取),请告诉我!
m
mozartyell
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h>
#define MAX_TEXT_SIZE 10000 #define MAX_WORD_LEN 100 #define MIN_CHUNK_SIZE 3 // 最小词块大小
// 结构体用于存储匹配的词块 typedef struct {    char chunk[MAX_WORD_LEN * MIN_CHUNK_SIZE];    int pos_a; // 在文本A中的位置    int pos_b; // 在文本B中的位置    int length; // 词块长度 } Match;
// 将文本转换为小写并移除标点符号 void normalize_text(char *text) {    int i, j = 0;    for (i = 0; text; i++) {        if (isalnum(text) || isspace(text)) {            text[j++] = tolower(text);        } else if (text[i-1] != ' ') {            text[j++] = ' '; // 用空格替换标点符号        }    }    text[j] = '\0'; }
// 将文本分割为单词数组 int split_words(char *text, char words[][MAX_WORD_LEN]) {    int count = 0;    char *token = strtok(text, " \t\n\r");
   while (token != NULL && count < MAX_TEXT_SIZE/10) {        if (strlen(token) > 0) {            strcpy(words[count], token);            count++;        }        token = strtok(NULL, " \t\n\r");    }    return count; }
// 检查从指定位置开始的连续匹配长度 int check_match_length(char words_a[][MAX_WORD_LEN], int count_a, int pos_a,                      char words_b[][MAX_WORD_LEN], int count_b, int pos_b) {    int length = 0;    while (pos_a + length < count_a && pos_b + length < count_b &&           strcmp(words_a[pos_a + length], words_b[pos_b + length]) == 0) {        length++;    }    return length; }
// 查找可能的剽窃词块 int find_plagiarism(char words_a[][MAX_WORD_LEN], int count_a,                   char words_b[][MAX_WORD_LEN], int count_b,                   Match matches[]) {    int match_count = 0;    int i, j;
   // 遍历文本B中的每个词    for (i = 0; i < count_b; i++) {        // 在文本A中查找匹配        for (j = 0; j < count_a; j++) {            if (strcmp(words_b, words_a[j]) == 0) {                // 找到匹配的词,检查连续匹配长度                int length = check_match_length(words_a, count_a, j, words_b, count_b, i);
               if (length >= MIN_CHUNK_SIZE) {                    // 构建匹配的词块字符串                    strcpy(matches[match_count].chunk, "");                    for (int k = 0; k < length && k < 10; k++) { // 限制显示长度                        if (k > 0) strcat(matches[match_count].chunk, " ");                        strcat(matches[match_count].chunk, words_b[i + k]);                    }
                   matches[match_count].pos_a = j;                    matches[match_count].pos_b = i;                    matches[match_count].length = length;                    match_count++;
                   // 跳过已匹配的部分                    i += length - 1;                    break;                }            }        }    }
   return match_count; }
// 从文件读取文本 int read_file(const char *filename, char *text) {    FILE *file = fopen(filename, "r");    if (file == NULL) {        printf("无法打开文件: %s\n", filename);        return 0;    }
   fread(text, sizeof(char), MAX_TEXT_SIZE - 1, file);    text[MAX_TEXT_SIZE - 1] = '\0';    fclose(file);    return 1; }
int main() {    char text_a[MAX_TEXT_SIZE], text_b[MAX_TEXT_SIZE];    char words_a[MAX_TEXT_SIZE/10][MAX_WORD_LEN];    char words_b[MAX_TEXT_SIZE/10][MAX_WORD_LEN];    Match matches[1000];
   printf("=== 文本剽窃检测程序 ===\n\n");
   // 方式1: 从文件读取    printf("请选择输入方式:\n");    printf("1. 从文件读取\n");    printf("2. 手动输入\n");    printf("请选择 (1-2): ");
   int choice;    scanf("%d", &choice);    getchar(); // 消费换行符
   if (choice == 1) {        char filename_a[100], filename_b[100];        printf("请输入文本A的文件名: ");        fgets(filename_a, sizeof(filename_a), stdin);        filename_a[strcspn(filename_a, "\n")] = 0; // 移除换行符
       printf("请输入文本B的文件名: ");        fgets(filename_b, sizeof(filename_b), stdin);        filename_b[strcspn(filename_b, "\n")] = 0;
       if (!read_file(filename_a, text_a) || !read_file(filename_b, text_b)) {            return 1;        }    } else {        // 方式2: 手动输入        printf("\n请输入文本A (按回车结束):\n");        fgets(text_a, sizeof(text_a), stdin);
       printf("\n请输入文本B (按回车结束):\n");        fgets(text_b, sizeof(text_b), stdin);    }
   // 规范化文本    normalize_text(text_a);    normalize_text(text_b);
   // 分割单词    int count_a = split_words(text_a, words_a);    int count_b = split_words(text_b, words_b);
   printf("\n文本A包含 %d 个词\n", count_a);    printf("文本B包含 %d 个词\n\n", count_b);
   // 查找剽窃    int match_count = find_plagiarism(words_a, count_a, words_b, count_b, matches);
   // 输出结果    if (match_count == 0) {        printf("未发现明显的剽窃词块\n");    } else {        printf("发现 %d 个可能的剽窃词块:\n\n", match_count);
       for (int i = 0; i < match_count; i++) {            printf("词块 %d:\n", i + 1);            printf(" 内容: \"%s\"\n", matches.chunk);            printf(" 长度: %d 个词\n", matches.length);            printf(" 在文本A中的位置: 第 %d 个词开始\n", matches.pos_a + 1);            printf(" 在文本B中的位置: 第 %d 个词开始\n", matches.pos_b + 1);            printf("\n");        }
       // 计算相似度        int total_matched_words = 0;        for (int i = 0; i < match_count; i++) {            total_matched_words += matches.length;        }
       float similarity = (float)total_matched_words / count_b * 100;        printf("相似度: %.2f%% (基于匹配词数)\n", similarity);
       if (similarity > 30) {            printf("警告: 相似度较高,可能存在剽窃!\n");        }    }
   return 0; }
人在它乡
2, 纯属好奇,问了Grok
Below is a VBA (Visual Basic for Applications) program for Microsoft Word to detect potential plagiarism by identifying matching word blocks between two Word documents (Document A by Student A and Document B by Student B) and highlighting those blocks in Document B. The program assumes the documents are open in Microsoft Word and compares text to find sequences of words (e.g., 3 or more consecutive identical words) that may indicate plagiarism. The matching blocks will be highlighted in Document B. VBA Code Option Explicit
Sub DetectPlagiarism()   Dim docA As Document   Dim docB As Document   Dim wordsA() As String   Dim wordsB() As String   Dim minMatchLen As Integer   Dim i As Long, j As Long, k As Long   Dim matchLen As Long   Dim found As Boolean   Dim rngB As Range   Dim matchText As String       ' Set minimum match length (e.g., 3 words)   minMatchLen = 3       ' Set references to the two open documents   ' Assume Document A and Document B are already open   On Error Resume Next   Set docA = Documents("DocumentA.docx") ' Replace with actual file name of Document A   Set docB = Documents("DocumentB.docx") ' Replace with actual file name of Document B   On Error GoTo 0       If docA Is Nothing Or docB Is Nothing Then     MsgBox "Please ensure both DocumentA.docx and DocumentB.docx are open.", vbCritical     Exit Sub   End If       ' Tokenize text from both documents into words   wordsA = TokenizeText(docA.Content.Text)   wordsB = TokenizeText(docB.Content.Text)       ' Check for matching word blocks   found = False   For i = LBound(wordsA) To UBound(wordsA) - minMatchLen + 1     For j = LBound(wordsB) To UBound(wordsB) - minMatchLen + 1       matchLen = 0       ' Compare words starting at i (Doc A) and j (Doc B)       While i + matchLen <= UBound(wordsA) And j + matchLen <= UBound(wordsB) And _          LCase(wordsA(i + matchLen)) = LCase(wordsB(j + matchLen))         matchLen = matchLen + 1       Wend               ' If a match of minMatchLen or more is found, highlight in Document B       If matchLen >= minMatchLen Then         found = True         ' Reconstruct the matched text         matchText = Join(ArraySlice(wordsB, j, j + matchLen - 1), " ")                   ' Find and highlight the matched text in Document B         Set rngB = docB.Content         With rngB.Find           .Text = matchText           .MatchWholeWord = True           .MatchCase = False           .Execute           If .Found Then             rngB.HighlightColorIndex = wdYellow ' Highlight in yellow             Debug.Print "Possible plagiarism detected (length " & matchLen & "): " & matchText           End If         End With       End If     Next j   Next i       If Not found Then     MsgBox "No potential plagiarism blocks found.", vbInformation   Else     MsgBox "Potential plagiarism blocks have been highlighted in Document B.", vbInformation   End If End Sub
' Function to tokenize text into an array of words Function TokenizeText(text As String) As String()   Dim words() As String   Dim wordCount As Long   Dim temp As String   Dim i As Long   Dim char As String   Dim currentWord As String       ' Initialize array   ReDim words(0 To 999) ' Adjust size as needed   wordCount = 0   currentWord = ""       ' Remove punctuation and split into words   For i = 1 To Len(text)     char = Mid(text, i, 1)     If IsLetterOrDigit(char) Or char = "'" Then       currentWord = currentWord & char     Else       If currentWord <> "" Then         words(wordCount) = currentWord         wordCount = wordCount + 1         currentWord = ""       End If     End If   Next i       ' Add the last word if exists   If currentWord <> "" Then     words(wordCount) = currentWord     wordCount = wordCount + 1   End If       ' Resize array to actual number of words   If wordCount > 0 Then     ReDim Preserve words(0 To wordCount - 1)   Else     ReDim words(0 To 0)   End If       TokenizeText = words End Function
' Helper function to check if a character is a letter or digit Function IsLetterOrDigit(char As String) As Boolean   IsLetterOrDigit = (char Like "[A-Za-z0-9]") End Function
' Function to slice an array (returns a portion of the array) Function ArraySlice(arr() As String, startIdx As Long, endIdx As Long) As String()   Dim result() As String   Dim i As Long   Dim j As Long       ReDim result(0 To endIdx - startIdx)   j = 0   For i = startIdx To endIdx     result(j) = arr(i)     j = j + 1   Next i       ArraySlice = result End Function How the Code Works Document Setup: The program assumes two Word documents (DocumentA.docx and DocumentB.docx) are open. You must replace these names with the actual file names of the documents. It checks if both documents are open, displaying an error if they are not. Text Tokenization: The TokenizeText function splits the text of each document into words, removing punctuation (except apostrophes for contractions like “don’t”) and converting to an array of words. Words are compared case-insensitively. Plagiarism Detection: The program uses a sliding window approach to compare sequences of words from Document A and Document B. A match is considered potential plagiarism if it contains at least minMatchLen (default: 3) consecutive identical words. When a match is found, the program reconstructs the matched text and searches for it in Document B. Highlighting: Matching word blocks in Document B are highlighted in yellow using Word’s HighlightColorIndex. The matched text is also printed to the Immediate Window (View > Immediate Window in the VBA Editor) for debugging. Output: If matches are found, a message box informs the user that potential plagiarism blocks have been highlighted in Document B. If no matches are found, a message box indicates no plagiarism was detected. How to Use Prepare the Documents: Open both DocumentA.docx (Student A’s document) and DocumentB.docx (Student B’s document) in Microsoft Word. Ensure the file names in the VBA code (DocumentA.docx and DocumentB.docx) match the actual file names. Set Up VBA: Open Microsoft Word. Press Alt + F11 to open the VBA Editor. Insert > Module, and paste the above code into the new module. Save the macro-enabled document or ensure macros are enabled. Run the Macro: In Word, press Alt + F8, select DetectPlagiarism, and click Run. The macro will process both documents, highlight potential plagiarism in Document B, and display a message box with the results. Check Results: Open DocumentB.docx to see any highlighted (yellow) text indicating potential plagiarism. Check the Immediate Window (Ctrl + G in the VBA Editor) for details on matched blocks. Example Document A: The quick brown fox jumps over the lazy dog Document B: My quick brown fox jumps over the idle cat Result: In DocumentB.docx, the phrase “quick brown fox jumps over” will be highlighted in yellow. A message box will confirm that potential plagiarism blocks were highlighted. The Immediate Window will show: Possible plagiarism detected (length 5): quick brown fox jumps over
Notes and Limitations File Names: Update the document names in the code to match your actual file names. Minimum Match Length: The minMatchLen variable is set to 3 words. Adjust it (e.g., to 4 or 5) to make the detection more or less strict. Performance: The program may be slow for very large documents due to the sliding window comparison. For optimization, consider limiting the text range or using a more advanced algorithm. Language: The code is designed for English text. For other languages (e.g., Chinese), you’d need to modify the TokenizeText function to handle different word boundaries. Punctuation: Punctuation is ignored except for apostrophes. If needed, modify IsLetterOrDigit to include other characters. Highlighting: The code highlights the first occurrence of each matched block in Document B. If the same block appears multiple times, only the first is highlighted. To highlight all instances, modify the .Execute loop to continue searching. Improvements File Input: Add a dialog to select documents instead of hardcoding names. Advanced Matching: Incorporate fuzzy matching or synonym detection for more sophisticated plagiarism checks. Output Report: Generate a detailed report in a new Word document listing all matches. Chinese Support: If the documents are in Chinese, use a library or API (e.g., via Python integration) to segment words, as VBA lacks native Chinese tokenization. If you need help with any of these improvements, specific document names, or testing, let me know!
m
mozartyell
' Word文档剽窃检测与标识程序 ' 使用方法: 在Word中按Alt+F11打开VBA编辑器,插入新模块,粘贴此代码
Option Explicit
' 自定义类型用于存储匹配信息 Type MatchInfo    StartPos As Long       ' 在文档B中的开始位置    Length As Long        ' 匹配长度    MatchText As String   ' 匹配的文本    SourcePos As Long     ' 在文档A中的位置 End Type
' 主函数 - 检测并标识剽窃内容 Sub DetectAndHighlightPlagiarism()    Dim docA As Document, docB As Document    Dim textA As String, textB As String    Dim wordsA() As String, wordsB() As String    Dim matches() As MatchInfo    Dim matchCount As Long    Dim i As Long
   ' 检查是否有足够的文档打开    If Documents.Count < 2 Then        MsgBox "请至少打开两个Word文档!" & vbCrLf & _               "文档A (原始文档) 和文档B (待检测文档)", vbExclamation        Exit Sub    End If
   ' 让用户选择文档    Set docA = SelectDocument("请选择文档A (原始文档):")    If docA Is Nothing Then Exit Sub
   Set docB = SelectDocument("请选择文档B (待检测文档):")    If docB Is Nothing Then Exit Sub
   If docA Is docB Then        MsgBox "请选择两个不同的文档!", vbExclamation        Exit Sub    End If
   ' 显示进度    Application.ScreenUpdating = False    Application.StatusBar = "正在分析文档..."
   ' 获取文档文本    textA = GetDocumentText(docA)    textB = GetDocumentText(docB)
   ' 预处理文本    textA = NormalizeText(textA)    textB = NormalizeText(textB)
   ' 分割为单词数组    wordsA = Split(textA, " ")    wordsB = Split(textB, " ")
   Application.StatusBar = "正在查找相似内容..."
   ' 查找匹配    matchCount = FindMatches(wordsA, wordsB, matches)
   ' 在文档B中高亮显示匹配内容    If matchCount > 0 Then        Call HighlightMatches(docB, matches, matchCount)        Call ShowResults(docA, docB, matches, matchCount, wordsB)    Else        MsgBox "未发现明显的相似内容。", vbInformation    End If
   Application.StatusBar = ""    Application.ScreenUpdating = True
   MsgBox "检测完成!相似内容已在文档B中用红色高亮标出。", vbInformation End Sub
' 让用户选择文档 Function SelectDocument(prompt As String) As Document    Dim docNames As String    Dim selectedName As String    Dim doc As Document    Dim i As Integer
   ' 构建文档名称列表    docNames = ""    For i = 1 To Documents.Count        docNames = docNames & i & ". " & Documents(i).Name & vbCrLf    Next i
   ' 显示选择对话框    selectedName = InputBox(prompt & vbCrLf & vbCrLf & docNames & vbCrLf & "请输入文档编号:", "选择文档")
   If selectedName = "" Then        Set SelectDocument = Nothing        Exit Function    End If
   ' 验证输入并返回对应文档    If IsNumeric(selectedName) Then        i = CInt(selectedName)        If i >= 1 And i <= Documents.Count Then            Set SelectDocument = Documents(i)        Else            MsgBox "无效的文档编号!", vbExclamation            Set SelectDocument = Nothing        End If    Else        MsgBox "请输入有效的数字!", vbExclamation        Set SelectDocument = Nothing    End If End Function
' 获取文档的纯文本内容 Function GetDocumentText(doc As Document) As String    Dim content As String    content = doc.Content.Text    ' 移除多余的空白字符    content = Replace(content, vbCr, " ")    content = Replace(content, vbLf, " ")    content = Replace(content, vbTab, " ")    GetDocumentText = content End Function
' 规范化文本 - 转换为小写,移除标点符号 Function NormalizeText(text As String) As String    Dim result As String    Dim i As Long    Dim char As String
   result = LCase(text)
   ' 移除标点符号,保留字母、数字和空格    For i = 1 To Len(result)        char = Mid(result, i, 1)        If (char >= "a" And char <= "z") Or (char >= "0" And char <= "9") Or char = " " Then            NormalizeText = NormalizeText & char        ElseIf NormalizeText <> "" And Right(NormalizeText, 1) <> " " Then            NormalizeText = NormalizeText & " "        End If    Next i
   ' 移除多余空格    Do While InStr(NormalizeText, " ") > 0        NormalizeText = Replace(NormalizeText, " ", " ")    Loop
   NormalizeText = Trim(NormalizeText) End Function
' 查找匹配的词块 Function FindMatches(wordsA() As String, wordsB() As String, matches() As MatchInfo) As Long    Dim i As Long, j As Long, k As Long    Dim matchCount As Long    Dim matchLength As Long    Dim minChunkSize As Long
   minChunkSize = 3 ' 最小匹配词块大小    matchCount = 0    ReDim matches(1000) ' 预分配数组
   ' 遍历文档B中的每个词    For i = LBound(wordsB) To UBound(wordsB)        If Len(wordsB(i)) > 0 Then ' 跳过空词            ' 在文档A中查找匹配            For j = LBound(wordsA) To UBound(wordsA)                If wordsA(j) = wordsB(i) Then                    ' 检查连续匹配长度                    matchLength = 0                    For k = 0 To UBound(wordsA) - j                        If i + k > UBound(wordsB) Then Exit For                        If j + k > UBound(wordsA) Then Exit For                        If wordsA(j + k) = wordsB(i + k) And Len(wordsB(i + k)) > 0 Then                            matchLength = matchLength + 1                        Else                            Exit For                        End If                    Next k
                   ' 如果匹配长度足够,记录匹配                    If matchLength >= minChunkSize Then                        matches(matchCount).StartPos = i                        matches(matchCount).Length = matchLength                        matches(matchCount).SourcePos = j
                       ' 构建匹配文本                        Dim matchText As String                        matchText = ""                        For k = 0 To matchLength - 1                            If k > 0 Then matchText = matchText & " "                            matchText = matchText & wordsB(i + k)                        Next k                        matches(matchCount).MatchText = matchText
                       matchCount = matchCount + 1                        i = i + matchLength - 1 ' 跳过已匹配的词                        Exit For                    End If                End If            Next j        End If    Next i
   FindMatches = matchCount End Function
' 在文档B中高亮显示匹配内容 Sub HighlightMatches(docB As Document, matches() As MatchInfo, matchCount As Long)    Dim i As Long, j As Long    Dim wordIndex As Long    Dim rng As Range    Dim para As Paragraph    Dim words() As String    Dim currentWordIndex As Long
   ' 获取文档B的所有文本    Dim allText As String    allText = GetDocumentText(docB)    allText = NormalizeText(allText)    words = Split(allText, " ")
   ' 遍历每个匹配项    For i = 0 To matchCount - 1        ' 在原文档中找到对应位置并高亮        Call HighlightTextInDocument(docB, matches(i).MatchText)    Next i End Sub
' 在文档中高亮指定文本 Sub HighlightTextInDocument(doc As Document, searchText As String)    Dim rng As Range    Set rng = doc.Content
   ' 查找并高亮所有匹配的文本    With rng.Find        .ClearFormatting        .Text = searchText        .Replacement.ClearFormatting        .Replacement.Highlight = True        .Replacement.Font.Color = RGB(255, 0, 0) ' 红色        .Execute Replace:=wdReplaceAll    End With End Sub
' 显示检测结果 Sub ShowResults(docA As Document, docB As Document, matches() As MatchInfo, matchCount As Long, wordsB() As String)    Dim result As String    Dim i As Long    Dim totalWords As Long    Dim matchedWords As Long    Dim similarity As Double
   result = "文档剽窃检测结果" & vbCrLf & String(30, "=") & vbCrLf & vbCrLf    result = result & "原始文档: " & docA.Name & vbCrLf    result = result & "检测文档: " & docB.Name & vbCrLf & vbCrLf
   If matchCount > 0 Then        result = result & "发现 " & matchCount & " 个相似词块:" & vbCrLf & vbCrLf
       For i = 0 To matchCount - 1            result = result & "词块 " & (i + 1) & ":" & vbCrLf            result = result & " 内容: """ & Left(matches(i).MatchText, 100)            If Len(matches(i).MatchText) > 100 Then result = result & "..."            result = result & """" & vbCrLf            result = result & " 长度: " & matches(i).Length & " 个词" & vbCrLf & vbCrLf
           matchedWords = matchedWords + matches(i).Length        Next i
       ' 计算相似度        totalWords = UBound(wordsB) - LBound(wordsB) + 1        If totalWords > 0 Then            similarity = (matchedWords / totalWords) * 100            result = result & "总体相似度: " & Format(similarity, "0.00") & "%" & vbCrLf
           If similarity > 30 Then                result = result & vbCrLf & "警告: 相似度较高,可能存在剽窃!"            End If        End If    Else        result = result & "未发现明显的相似内容。"    End If
   ' 显示结果窗口    MsgBox result, vbInformation, "检测结果" End Sub
' 辅助函数:清除文档中的所有高亮 Sub ClearAllHighlights()    Dim doc As Document    If Documents.Count = 0 Then        MsgBox "请先打开一个文档!", vbExclamation        Exit Sub    End If
   Set doc = ActiveDocument
   With doc.Content.Find        .ClearFormatting        .Replacement.ClearFormatting        .Text = ""        .Replacement.Text = ""        .Replacement.Highlight = False        .Execute Replace:=wdReplaceAll    End With
   MsgBox "已清除所有高亮标记。", vbInformation End Sub
m
mozartyell
我来帮你解决这两个文本相似性检测的问题。 1. C语言版本 - 检测文本剽窃## 2. VBA版本 - Word文档剽窃检测与标识## 使用说明 C语言程序使用方法: 将代码保存为 plagiarism_detector.c 编译:gcc plagiarism_detector.c -o plagiarism_detector 运行:./plagiarism_detector 选择输入方式(文件或手动输入) 程序会自动分析并显示可能的剽窃词块 主要功能: 文本规范化(去除标点,转小写) 基于滑动窗口的连续词块匹配 可调整最小匹配长度(默认3个词) 计算相似度百分比 支持文件输入和手动输入 VBA程序使用方法: 打开两个Word文档(文档A和文档B) 按 Alt + F11 打开VBA编辑器 插入新模块,粘贴VBA代码 按 F5 运行 DetectAndHighlightPlagiarism 函数 按提示选择原始文档和待检测文档 程序会自动检测并在文档B中用红色高亮标出相似内容 主要功能: 智能文档选择界面 文本预处理和词汇化 连续词块匹配算法 自动在Word文档中高亮标识相似内容 详细的检测报告和相似度统计 提供清除高亮的辅助函数 算法特点: 准确性:使用连续词块匹配,避免单词偶然重复的误报 灵活性:可调整最小匹配长度阈值 实用性:C版本适合批量处理,VBA版本集成Word环境 可视化:VBA版本直接在文档中标识问题区域 这两个程序都能有效检测文本间的相似性,帮助识别可能的剽窃行为。
g
gooog
都不对。 不是按照一个词一个词去比较。而是词块去比较。
m
mozartyell
gooog 发表于 2025-05-29 22:53
都不对。 不是按照一个词一个词去比较。而是词块去比较。

那你可以跟AI提意见啊,马上就有新的版本。
你不说清楚你想要什么,凭什么去骂你的手下不知道怎么出揣测你的意图?你这种人就算当真人的领导也不会是个好领导。
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h>
#define MAX_TEXT_SIZE 10000 #define MAX_WORD_LEN 100 #define MIN_CHUNK_SIZE 3  // 最小词块大小 #define MAX_CHUNK_SIZE 10 // 最大词块大小 #define MAX_CHUNKS 1000   // 最大词块数量
// 词块结构体 typedef struct {    char words[MAX_CHUNK_SIZE][MAX_WORD_LEN]; // 词块中的单词    int word_count;                           // 词块中单词数量    int start_pos;                           // 在原文本中的起始位置 } Chunk;
// 匹配结果结构体 typedef struct {    int chunk_a_index;    // 在文本A词块数组中的索引    int chunk_b_index;    // 在文本B词块数组中的索引    int chunk_size;       // 匹配的词块大小    char matched_text[MAX_WORD_LEN * MAX_CHUNK_SIZE]; // 匹配的文本    float similarity;     // 词块相似度 } Match;
// 将文本转换为小写并移除标点符号 void normalize_text(char *text) {    int i, j = 0;    for (i = 0; text; i++) {        if (isalnum(text) || isspace(text)) {            text[j++] = tolower(text);        } else if (j > 0 && text[j-1] != ' ') {            text[j++] = ' '; // 用空格替换标点符号        }    }    text[j] = '\0';
   // 移除多余空格    char temp[MAX_TEXT_SIZE];    strcpy(temp, text);    j = 0;    int space_flag = 0;    for (i = 0; temp; i++) {        if (temp == ' ') {            if (!space_flag) {                text[j++] = ' ';                space_flag = 1;            }        } else {            text[j++] = temp;            space_flag = 0;        }    }    text[j] = '\0'; }
// 将文本分割为单词数组 int split_words(char *text, char words[][MAX_WORD_LEN]) {    int count = 0;    char *token = strtok(text, " \t\n\r");
   while (token != NULL && count < MAX_TEXT_SIZE/10) {        if (strlen(token) > 0) {            strcpy(words[count], token);            count++;        }        token = strtok(NULL, " \t\n\r");    }    return count; }
// 生成不同大小的词块 int generate_chunks(char words[][MAX_WORD_LEN], int word_count, Chunk chunks[]) {    int chunk_count = 0;    int chunk_size, start_pos;
   // 生成不同大小的词块(从MIN_CHUNK_SIZE到MAX_CHUNK_SIZE)    for (chunk_size = MIN_CHUNK_SIZE; chunk_size <= MAX_CHUNK_SIZE; chunk_size++) {        for (start_pos = 0; start_pos <= word_count - chunk_size; start_pos++) {            if (chunk_count >= MAX_CHUNKS) break;
           chunks[chunk_count].word_count = chunk_size;            chunks[chunk_count].start_pos = start_pos;
           // 复制词块中的单词            for (int i = 0; i < chunk_size; i++) {                strcpy(chunks[chunk_count].words, words[start_pos + i]);            }
           chunk_count++;        }        if (chunk_count >= MAX_CHUNKS) break;    }
   return chunk_count; }
// 计算两个词块的相似度(完全匹配为1.0,部分匹配按比例计算) float calculate_chunk_similarity(Chunk *chunk_a, Chunk *chunk_b) {    if (chunk_a->word_count != chunk_b->word_count) {        return 0.0; // 不同大小的词块不比较    }
   int matched_words = 0;    int total_words = chunk_a->word_count;
   for (int i = 0; i < total_words; i++) {        if (strcmp(chunk_a->words, chunk_b->words) == 0) {            matched_words++;        }    }
   return (float)matched_words / total_words; }
// 查找匹配的词块(基于词块比较) int find_chunk_matches(Chunk chunks_a[], int count_a, Chunk chunks_b[], int count_b,                      Match matches[], float similarity_threshold) {    int match_count = 0;
   for (int i = 0; i < count_b && match_count < MAX_CHUNKS; i++) {        for (int j = 0; j < count_a; j++) {            float similarity = calculate_chunk_similarity(&chunks_a[j], &chunks_b);
           if (similarity >= similarity_threshold) {                // 检查是否已存在重叠的匹配                int is_duplicate = 0;                for (int k = 0; k < match_count; k++) {                    if (abs(matches[k].chunk_b_index - i) < chunks_b.word_count) {                        if (matches[k].similarity < similarity) {                            // 用更好的匹配替换                            matches[k].chunk_a_index = j;                            matches[k].chunk_b_index = i;                            matches[k].chunk_size = chunks_b.word_count;                            matches[k].similarity = similarity;
                           // 构建匹配文本                            strcpy(matches[k].matched_text, "");                            for (int l = 0; l < chunks_b.word_count; l++) {                                if (l > 0) strcat(matches[k].matched_text, " ");                                strcat(matches[k].matched_text, chunks_b.words[l]);                            }                        }                        is_duplicate = 1;                        break;                    }                }
               if (!is_duplicate) {                    matches[match_count].chunk_a_index = j;                    matches[match_count].chunk_b_index = i;                    matches[match_count].chunk_size = chunks_b.word_count;                    matches[match_count].similarity = similarity;
                   // 构建匹配文本                    strcpy(matches[match_count].matched_text, "");                    for (int l = 0; l < chunks_b.word_count; l++) {                        if (l > 0) strcat(matches[match_count].matched_text, " ");                        strcat(matches[match_count].matched_text, chunks_b.words[l]);                    }
                   match_count++;                }                break; // 找到匹配就跳出内层循环            }        }    }
   return match_count; }
// 按相似度排序匹配结果 void sort_matches_by_similarity(Match matches[], int count) {    for (int i = 0; i < count - 1; i++) {        for (int j = i + 1; j < count; j++) {            if (matches.similarity < matches[j].similarity) {                Match temp = matches;                matches = matches[j];                matches[j] = temp;            }        }    } }
// 从文件读取文本 int read_file(const char *filename, char *text) {    FILE *file = fopen(filename, "r");    if (file == NULL) {        printf("无法打开文件: %s\n", filename);        return 0;    }
   size_t bytes_read = fread(text, sizeof(char), MAX_TEXT_SIZE - 1, file);    text[bytes_read] = '\0';    fclose(file);    return 1; }
int main() {    char text_a[MAX_TEXT_SIZE], text_b[MAX_TEXT_SIZE];    char words_a[MAX_TEXT_SIZE/10][MAX_WORD_LEN];    char words_b[MAX_TEXT_SIZE/10][MAX_WORD_LEN];    Chunk chunks_a[MAX_CHUNKS], chunks_b[MAX_CHUNKS];    Match matches[MAX_CHUNKS];
   printf("=== 基于词块的文本剽窃检测程序 ===\n\n");
   // 输入方式选择    printf("请选择输入方式:\n");    printf("1. 从文件读取\n");    printf("2. 手动输入\n");    printf("请选择 (1-2): ");
   int choice;    scanf("%d", &choice);    getchar();
   if (choice == 1) {        char filename_a[100], filename_b[100];        printf("请输入文本A的文件名: ");        fgets(filename_a, sizeof(filename_a), stdin);        filename_a[strcspn(filename_a, "\n")] = 0;
       printf("请输入文本B的文件名: ");        fgets(filename_b, sizeof(filename_b), stdin);        filename_b[strcspn(filename_b, "\n")] = 0;
       if (!read_file(filename_a, text_a) || !read_file(filename_b, text_b)) {            return 1;        }    } else {        printf("\n请输入文本A (按回车结束):\n");        fgets(text_a, sizeof(text_a), stdin);
       printf("\n请输入文本B (按回车结束):\n");        fgets(text_b, sizeof(text_b), stdin);    }
   // 设置相似度阈值    float threshold;    printf("\n请输入相似度阈值 (0.0-1.0, 推荐0.8): ");    scanf("%f", &threshold);    if (threshold < 0.0 || threshold > 1.0) threshold = 0.8;
   printf("\n正在处理文本...\n");
   // 规范化文本    normalize_text(text_a);    normalize_text(text_b);
   // 分割单词    int word_count_a = split_words(text_a, words_a);    int word_count_b = split_words(text_b, words_b);
   printf("文本A包含 %d 个词\n", word_count_a);    printf("文本B包含 %d 个词\n", word_count_b);
   // 生成词块    printf("正在生成词块...\n");    int chunk_count_a = generate_chunks(words_a, word_count_a, chunks_a);    int chunk_count_b = generate_chunks(words_b, word_count_b, chunks_b);
   printf("文本A生成 %d 个词块\n", chunk_count_a);    printf("文本B生成 %d 个词块\n", chunk_count_b);
   // 查找匹配词块    printf("正在匹配词块...\n");    int match_count = find_chunk_matches(chunks_a, chunk_count_a, chunks_b, chunk_count_b,                                        matches, threshold);
   // 排序结果    sort_matches_by_similarity(matches, match_count);
   // 输出结果    printf("\n" + (char*)50);    if (match_count == 0) {        printf("未发现相似度超过 %.2f 的词块\n", threshold);    } else {        printf("发现 %d 个相似词块 (相似度 >= %.2f):\n\n", match_count, threshold);
       int total_matched_words = 0;        for (int i = 0; i < match_count; i++) {            printf("词块 %d:\n", i + 1);            printf(" 内容: \"%s\"\n", matches.matched_text);            printf(" 大小: %d 个词\n", matches.chunk_size);            printf(" 相似度: %.2f%%\n", matches.similarity * 100);            printf(" 在文本A中位置: 第 %d 个词开始\n",                   chunks_a[matches.chunk_a_index].start_pos + 1);            printf(" 在文本B中位置: 第 %d 个词开始\n",                   chunks_b[matches.chunk_b_index].start_pos + 1);            printf("\n");
           total_matched_words += matches.chunk_size;        }
       // 计算总体相似度        float overall_similarity = (float)total_matched_words / word_count_b * 100;        printf("总体相似度: %.2f%% (基于匹配词数)\n", overall_similarity);
       if (overall_similarity > 30) {            printf("⚠️ 警告: 相似度较高,可能存在剽窃!\n");        } else if (overall_similarity > 15) {            printf("⚠️ 注意: 发现一定程度的相似性\n");        }
       // 统计不同大小词块的匹配情况        printf("\n词块大小分布:\n");        int size_stats[MAX_CHUNK_SIZE + 1] = {0};        for (int i = 0; i < match_count; i++) {            size_stats[matches.chunk_size]++;        }
       for (int i = MIN_CHUNK_SIZE; i <= MAX_CHUNK_SIZE; i++) {            if (size_stats > 0) {                printf(" %d词块: %d个\n", i, size_stats);            }        }    }
   return 0; }
m
mozartyell
现在的AI是你一个最为好使的手下,从来不会顶撞你,能力又特别强,有这样的帮手你还要怎样?真不知道这些不appreciate AI的人,在现实中是多么令人讨厌的人。
i
istio

.
m
mozartyell
我发现反AI的人有几个特点:思维及其简单肤浅,缺乏nuiance,非黑即白。 2.安全感极低(因为自己能力差),害怕AI取代自己 3.对人类社会科技发展的历史缺乏认识,其实就是当年工业革命的路德派
g
gooog
mozartyell 发表于 2025-05-29 22:57
现在的AI是你一个最为好使的手下,从来不会顶撞你,能力又特别强,有这样的帮手你还要怎样?真不知道这些不appreciate AI的人,在现实中是多么令人讨厌的人。

上面这个代码还不对。 词块是变长度的,不是等长度。可能是3,但可能是4,5,........1000.
m
mozartyell
gooog 发表于 2025-05-29 23:04
上面这个代码还不对。 词块是变长度的,不是等长度。可能是3,但可能是4,5,........1000.

#include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h>
#define MAX_TEXT_SIZE 50000 #define MAX_WORD_LEN 100 #define MIN_CHUNK_SIZE 3  // 最小词块大小 #define MAX_MATCHES 1000  // 最大匹配数量
// 变长词块匹配结构体 typedef struct {    int start_a;          // 在文本A中的起始位置    int start_b;          // 在文本B中的起始位置    int length;           // 词块长度(可变)    char *matched_text;   // 匹配的文本内容 } VariableMatch;
// 将文本转换为小写并规范化 void normalize_text(char *text) {    int i, j = 0;    for (i = 0; text; i++) {        if (isalnum(text)) {            text[j++] = tolower(text);        } else if (j > 0 && text[j-1] != ' ') {            text[j++] = ' ';        }    }    text[j] = '\0';
   // 移除多余空格    char temp[MAX_TEXT_SIZE];    strcpy(temp, text);    j = 0;    int space_flag = 0;    for (i = 0; temp; i++) {        if (temp == ' ') {            if (!space_flag) {                text[j++] = ' ';                space_flag = 1;            }        } else {            text[j++] = temp;            space_flag = 0;        }    }    if (j > 0 && text[j-1] == ' ') j--;    text[j] = '\0'; }
// 分割文本为单词数组 int split_words(char *text, char words[][MAX_WORD_LEN]) {    int count = 0;    char *text_copy = malloc(strlen(text) + 1);    strcpy(text_copy, text);
   char *token = strtok(text_copy, " ");    while (token != NULL && count < MAX_TEXT_SIZE/10) {        if (strlen(token) > 0) {            strcpy(words[count], token);            count++;        }        token = strtok(NULL, " ");    }
   free(text_copy);    return count; }
// 从指定位置开始计算最大连续匹配长度(贪心扩展) int calculate_max_match_length(char words_a[][MAX_WORD_LEN], int count_a, int pos_a,                              char words_b[][MAX_WORD_LEN], int count_b, int pos_b) {    int length = 0;
   // 向前扩展,尽可能找到最长的连续匹配    while (pos_a + length < count_a &&           pos_b + length < count_b &&           strcmp(words_a[pos_a + length], words_b[pos_b + length]) == 0) {        length++;    }
   return length; }
// 构建匹配文本字符串 char* build_match_text(char words[][MAX_WORD_LEN], int start_pos, int length) {    int total_len = 0;
   // 计算所需的总长度    for (int i = 0; i < length; i++) {        total_len += strlen(words[start_pos + i]) + 1; // +1 for space    }
   char *result = malloc(total_len + 1);    result[0] = '\0';
   for (int i = 0; i < length; i++) {        if (i > 0) strcat(result, " ");        strcat(result, words[start_pos + i]);    }
   return result; }
// 检查是否与现有匹配重叠 int is_overlapping(VariableMatch *matches, int match_count, int start_b, int length) {    for (int i = 0; i < match_count; i++) {        int existing_start = matches.start_b;        int existing_end = matches.start_b + matches.length - 1;        int new_start = start_b;        int new_end = start_b + length - 1;
       // 检查是否重叠        if (!(new_end < existing_start || new_start > existing_end)) {            return 1; // 有重叠        }    }    return 0; // 无重叠 }
// 变长度词块匹配算法(核心算法) int find_variable_length_matches(char words_a[][MAX_WORD_LEN], int count_a,                                 char words_b[][MAX_WORD_LEN], int count_b,                                 VariableMatch *matches) {    int match_count = 0;
   printf("开始变长度词块检测...\n");    printf("正在扫描文本B的每个位置...\n");
   // 遍历文本B的每个位置    for (int i = 0; i < count_b && match_count < MAX_MATCHES; i++) {        if (i % 100 == 0) {            printf("进度: %d/%d (%.1f%%)\r", i, count_b, (float)i/count_b*100);            fflush(stdout);        }
       int max_length = 0;        int best_match_pos = -1;
       // 在文本A中查找当前词的所有出现位置        for (int j = 0; j < count_a; j++) {            if (strcmp(words_b, words_a[j]) == 0) {                // 找到匹配的起始词,计算从这里开始的最大连续匹配长度                int length = calculate_max_match_length(words_a, count_a, j, words_b, count_b, i);
               // 如果这个匹配更长,更新最佳匹配                if (length > max_length) {                    max_length = length;                    best_match_pos = j;                }            }        }
       // 如果找到足够长的匹配且不与现有匹配重叠        if (max_length >= MIN_CHUNK_SIZE &&            !is_overlapping(matches, match_count, i, max_length)) {
           matches[match_count].start_a = best_match_pos;            matches[match_count].start_b = i;            matches[match_count].length = max_length;            matches[match_count].matched_text = build_match_text(words_b, i, max_length);
           match_count++;
           // 跳过已匹配的词,避免重复检测            i += max_length - 1;        }    }
   printf("\n检测完成!\n");    return match_count; }
// 按匹配长度排序(长的在前) void sort_matches_by_length(VariableMatch *matches, int count) {    for (int i = 0; i < count - 1; i++) {        for (int j = i + 1; j < count; j++) {            if (matches.length < matches[j].length) {                VariableMatch temp = matches;                matches = matches[j];                matches[j] = temp;            }        }    } }
// 从文件读取文本 int read_file(const char *filename, char *text) {    FILE *file = fopen(filename, "r");    if (file == NULL) {        printf("无法打开文件: %s\n", filename);        return 0;    }
   size_t bytes_read = fread(text, sizeof(char), MAX_TEXT_SIZE - 1, file);    text[bytes_read] = '\0';    fclose(file);    return 1; }
// 释放匹配结果的内存 void free_matches(VariableMatch *matches, int count) {    for (int i = 0; i < count; i++) {        if (matches.matched_text != NULL) {            free(matches.matched_text);        }    } }
int main() {    char *text_a = malloc(MAX_TEXT_SIZE);    char *text_b = malloc(MAX_TEXT_SIZE);    char (*words_a)[MAX_WORD_LEN] = malloc(sizeof(char[MAX_TEXT_SIZE/5][MAX_WORD_LEN]));    char (*words_b)[MAX_WORD_LEN] = malloc(sizeof(char[MAX_TEXT_SIZE/5][MAX_WORD_LEN]));    VariableMatch *matches = malloc(sizeof(VariableMatch) * MAX_MATCHES);
   if (!text_a || !text_b || !words_a || !words_b || !matches) {        printf("内存分配失败!\n");        return 1;    }
   printf("=== 变长度词块剽窃检测程序 ===\n\n");    printf("本程序使用贪心算法自动发现任意长度的相似词块\n");    printf("词块长度范围:%d 词 到 无限制\n\n", MIN_CHUNK_SIZE);
   // 输入方式选择    printf("请选择输入方式:\n");    printf("1. 从文件读取\n");    printf("2. 手动输入\n");    printf("请选择 (1-2): ");
   int choice;    scanf("%d", &choice);    getchar();
   if (choice == 1) {        char filename_a[200], filename_b[200];        printf("请输入文本A的文件名: ");        fgets(filename_a, sizeof(filename_a), stdin);        filename_a[strcspn(filename_a, "\n")] = 0;
       printf("请输入文本B的文件名: ");        fgets(filename_b, sizeof(filename_b), stdin);        filename_b[strcspn(filename_b, "\n")] = 0;
       if (!read_file(filename_a, text_a) || !read_file(filename_b, text_b)) {            goto cleanup;        }    } else {        printf("\n请输入文本A:\n");        fgets(text_a, MAX_TEXT_SIZE, stdin);
       printf("\n请输入文本B:\n");        fgets(text_b, MAX_TEXT_SIZE, stdin);    }
   printf("\n正在预处理文本...\n");
   // 规范化文本    normalize_text(text_a);    normalize_text(text_b);
   // 分割单词    int word_count_a = split_words(text_a, words_a);    int word_count_b = split_words(text_b, words_b);
   printf("文本A: %d 个词\n", word_count_a);    printf("文本B: %d 个词\n", word_count_b);
   if (word_count_a == 0 || word_count_b == 0) {        printf("错误: 文本为空或无有效词汇\n");        goto cleanup;    }
   // 执行变长度匹配    int match_count = find_variable_length_matches(words_a, word_count_a,                                                  words_b, word_count_b, matches);
   // 排序结果    sort_matches_by_length(matches, match_count);
   // 输出结果    printf("\n");    printf("=" * 60);    printf("\n检测结果\n");    printf("=" * 60);    printf("\n");
   if (match_count == 0) {        printf("未发现长度 >= %d 词的相似词块\n", MIN_CHUNK_SIZE);    } else {        printf("发现 %d 个变长度相似词块:\n\n", match_count);
       int total_matched_words = 0;        int length_stats[21] = {0}; // 统计不同长度的词块数量
       for (int i = 0; i < match_count; i++) {            printf("词块 %d:\n", i + 1);            printf(" 长度: %d 个词\n", matches.length);
           // 显示内容(如果太长则截断)            if (strlen(matches.matched_text) <= 150) {                printf(" 内容: \"%s\"\n", matches.matched_text);            } else {                printf(" 内容: \"%.147s...\"\n", matches.matched_text);            }
           printf(" 文本A位置: 第 %d-%d 个词\n",                   matches.start_a + 1,                   matches.start_a + matches.length);            printf(" 文本B位置: 第 %d-%d 个词\n",                   matches.start_b + 1,                   matches.start_b + matches.length);            printf("\n");
           total_matched_words += matches.length;
           // 统计长度分布            int len_category = matches.length <= 20 ? matches.length : 20;            length_stats[len_category]++;        }
       // 计算覆盖率        float coverage = (float)total_matched_words / word_count_b * 100;        printf("统计信息:\n");        printf(" 匹配词数: %d / %d\n", total_matched_words, word_count_b);        printf(" 覆盖率: %.2f%%\n", coverage);
       // 显示长度分布        printf("\n词块长度分布:\n");        for (int i = MIN_CHUNK_SIZE; i <= 20; i++) {            if (length_stats > 0) {                printf(" %d词块: %d个\n", i, length_stats);            }        }        if (length_stats[20] > 0) {            printf(" 20+词块: %d个\n", length_stats[20]);        }
       // 风险评估        printf("\n风险评估:\n");        if (coverage > 50) {            printf(" 🚨 高风险: 超过50%%的内容可能被剽窃\n");        } else if (coverage > 30) {            printf(" ⚠️ 中风险: 发现显著相似性\n");        } else if (coverage > 15) {            printf(" ⚠️ 低风险: 发现部分相似性\n");        } else {            printf(" ✅ 相似度较低\n");        }
       // 找出最长的匹配        if (match_count > 0) {            printf("\n最长匹配词块: %d 个词\n", matches[0].length);            if (matches[0].length > 20) {                printf(" ⚠️ 发现超长匹配,强烈建议人工审查\n");            }        }    }
cleanup:    free_matches(matches, match_count);    free(text_a);    free(text_b);    free(words_a);    free(words_b);    free(matches);
   return 0; }
g
gooog
mozartyell 发表于 2025-05-29 23:00
我发现反AI的人有几个特点:思维及其简单肤浅,缺乏nuiance,非黑即白。 2.安全感极低(因为自己能力差),害怕AI取代自己 3.对人类社会科技发展的历史缺乏认识,其实就是当年工业革命的路德派

你把剽窃这个编程看得太容易了。以为AI就能搞定。
AI只能搞定两个文本的字符串匹配。
剽窃,AI根本搞不定!!!
m
mozartyell
gooog 发表于 2025-05-29 23:06
你把剽窃这个编程看得太容易了。以为AI就能搞定。
AI只能搞定两个文本的字符串匹配。
剽窃,AI根本搞不定!!!

呵呵。你想要什么级别的detect剽窃的程序,你需要把你的需求表达清楚,明白吗?
g
gooog
mozartyell 发表于 2025-05-29 23:06
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h>
#define MAX_TEXT_SIZE 50000 #define MAX_WORD_LEN 100 #define MIN_CHUNK_SIZE 3  // 最小词块大小 #define MAX_MATCHES 1000  // 最大匹配数量
// 变长词块匹配结构体 typedef struct {    int start_a;          // 在文本A中的起始位置    int start_b;          // 在文本B中的起始位置    int length;           // 词块长度(可变)    char *matched_text;   // 匹配的文本内容 } VariableMatch;
// 将文本转换为小写并规范化 void normalize_text(char *text) {    int i, j = 0;    for (i = 0; text; i++) {        if (isalnum(text)) {            text[j++] = tolower(text);        } else if (j > 0 && text[j-1] != ' ') {            text[j++] = ' ';        }    }    text[j] = '\0';
   // 移除多余空格    char temp[MAX_TEXT_SIZE];    strcpy(temp, text);    j = 0;    int space_flag = 0;    for (i = 0; temp; i++) {        if (temp == ' ') {            if (!space_flag) {                text[j++] = ' ';                space_flag = 1;            }        } else {            text[j++] = temp;            space_flag = 0;        }    }    if (j > 0 && text[j-1] == ' ') j--;    text[j] = '\0'; }
// 分割文本为单词数组 int split_words(char *text, char words[][MAX_WORD_LEN]) {    int count = 0;    char *text_copy = malloc(strlen(text) + 1);    strcpy(text_copy, text);
   char *token = strtok(text_copy, " ");    while (token != NULL && count < MAX_TEXT_SIZE/10) {        if (strlen(token) > 0) {            strcpy(words[count], token);            count++;        }        token = strtok(NULL, " ");    }
   free(text_copy);    return count; }
// 从指定位置开始计算最大连续匹配长度(贪心扩展) int calculate_max_match_length(char words_a[][MAX_WORD_LEN], int count_a, int pos_a,                              char words_b[][MAX_WORD_LEN], int count_b, int pos_b) {    int length = 0;
   // 向前扩展,尽可能找到最长的连续匹配    while (pos_a + length < count_a &&           pos_b + length < count_b &&           strcmp(words_a[pos_a + length], words_b[pos_b + length]) == 0) {        length++;    }
   return length; }
// 构建匹配文本字符串 char* build_match_text(char words[][MAX_WORD_LEN], int start_pos, int length) {    int total_len = 0;
   // 计算所需的总长度    for (int i = 0; i < length; i++) {        total_len += strlen(words[start_pos + i]) + 1; // +1 for space    }
   char *result = malloc(total_len + 1);    result[0] = '\0';
   for (int i = 0; i < length; i++) {        if (i > 0) strcat(result, " ");        strcat(result, words[start_pos + i]);    }
   return result; }
// 检查是否与现有匹配重叠 int is_overlapping(VariableMatch *matches, int match_count, int start_b, int length) {    for (int i = 0; i < match_count; i++) {        int existing_start = matches.start_b;        int existing_end = matches.start_b + matches.length - 1;        int new_start = start_b;        int new_end = start_b + length - 1;
       // 检查是否重叠        if (!(new_end < existing_start || new_start > existing_end)) {            return 1; // 有重叠        }    }    return 0; // 无重叠 }
// 变长度词块匹配算法(核心算法) int find_variable_length_matches(char words_a[][MAX_WORD_LEN], int count_a,                                 char words_b[][MAX_WORD_LEN], int count_b,                                 VariableMatch *matches) {    int match_count = 0;
   printf("开始变长度词块检测...\n");    printf("正在扫描文本B的每个位置...\n");
   // 遍历文本B的每个位置    for (int i = 0; i < count_b && match_count < MAX_MATCHES; i++) {        if (i % 100 == 0) {            printf("进度: %d/%d (%.1f%%)\r", i, count_b, (float)i/count_b*100);            fflush(stdout);        }
       int max_length = 0;        int best_match_pos = -1;
       // 在文本A中查找当前词的所有出现位置        for (int j = 0; j < count_a; j++) {            if (strcmp(words_b, words_a[j]) == 0) {                // 找到匹配的起始词,计算从这里开始的最大连续匹配长度                int length = calculate_max_match_length(words_a, count_a, j, words_b, count_b, i);
               // 如果这个匹配更长,更新最佳匹配                if (length > max_length) {                    max_length = length;                    best_match_pos = j;                }            }        }
       // 如果找到足够长的匹配且不与现有匹配重叠        if (max_length >= MIN_CHUNK_SIZE &&            !is_overlapping(matches, match_count, i, max_length)) {
           matches[match_count].start_a = best_match_pos;            matches[match_count].start_b = i;            matches[match_count].length = max_length;            matches[match_count].matched_text = build_match_text(words_b, i, max_length);
           match_count++;
           // 跳过已匹配的词,避免重复检测            i += max_length - 1;        }    }
   printf("\n检测完成!\n");    return match_count; }
// 按匹配长度排序(长的在前) void sort_matches_by_length(VariableMatch *matches, int count) {    for (int i = 0; i < count - 1; i++) {        for (int j = i + 1; j < count; j++) {            if (matches.length < matches[j].length) {                VariableMatch temp = matches;                matches = matches[j];                matches[j] = temp;            }        }    } }
// 从文件读取文本 int read_file(const char *filename, char *text) {    FILE *file = fopen(filename, "r");    if (file == NULL) {        printf("无法打开文件: %s\n", filename);        return 0;    }
   size_t bytes_read = fread(text, sizeof(char), MAX_TEXT_SIZE - 1, file);    text[bytes_read] = '\0';    fclose(file);    return 1; }
// 释放匹配结果的内存 void free_matches(VariableMatch *matches, int count) {    for (int i = 0; i < count; i++) {        if (matches.matched_text != NULL) {            free(matches.matched_text);        }    } }
int main() {    char *text_a = malloc(MAX_TEXT_SIZE);    char *text_b = malloc(MAX_TEXT_SIZE);    char (*words_a)[MAX_WORD_LEN] = malloc(sizeof(char[MAX_TEXT_SIZE/5][MAX_WORD_LEN]));    char (*words_b)[MAX_WORD_LEN] = malloc(sizeof(char[MAX_TEXT_SIZE/5][MAX_WORD_LEN]));    VariableMatch *matches = malloc(sizeof(VariableMatch) * MAX_MATCHES);
   if (!text_a || !text_b || !words_a || !words_b || !matches) {        printf("内存分配失败!\n");        return 1;    }
   printf("=== 变长度词块剽窃检测程序 ===\n\n");    printf("本程序使用贪心算法自动发现任意长度的相似词块\n");    printf("词块长度范围:%d 词 到 无限制\n\n", MIN_CHUNK_SIZE);
   // 输入方式选择    printf("请选择输入方式:\n");    printf("1. 从文件读取\n");    printf("2. 手动输入\n");    printf("请选择 (1-2): ");
   int choice;    scanf("%d", &choice);    getchar();
   if (choice == 1) {        char filename_a[200], filename_b[200];        printf("请输入文本A的文件名: ");        fgets(filename_a, sizeof(filename_a), stdin);        filename_a[strcspn(filename_a, "\n")] = 0;
       printf("请输入文本B的文件名: ");        fgets(filename_b, sizeof(filename_b), stdin);        filename_b[strcspn(filename_b, "\n")] = 0;
       if (!read_file(filename_a, text_a) || !read_file(filename_b, text_b)) {            goto cleanup;        }    } else {        printf("\n请输入文本A:\n");        fgets(text_a, MAX_TEXT_SIZE, stdin);
       printf("\n请输入文本B:\n");        fgets(text_b, MAX_TEXT_SIZE, stdin);    }
   printf("\n正在预处理文本...\n");
   // 规范化文本    normalize_text(text_a);    normalize_text(text_b);
   // 分割单词    int word_count_a = split_words(text_a, words_a);    int word_count_b = split_words(text_b, words_b);
   printf("文本A: %d 个词\n", word_count_a);    printf("文本B: %d 个词\n", word_count_b);
   if (word_count_a == 0 || word_count_b == 0) {        printf("错误: 文本为空或无有效词汇\n");        goto cleanup;    }
   // 执行变长度匹配    int match_count = find_variable_length_matches(words_a, word_count_a,                                                  words_b, word_count_b, matches);
   // 排序结果    sort_matches_by_length(matches, match_count);
   // 输出结果    printf("\n");    printf("=" * 60);    printf("\n检测结果\n");    printf("=" * 60);    printf("\n");
   if (match_count == 0) {        printf("未发现长度 >= %d 词的相似词块\n", MIN_CHUNK_SIZE);    } else {        printf("发现 %d 个变长度相似词块:\n\n", match_count);
       int total_matched_words = 0;        int length_stats[21] = {0}; // 统计不同长度的词块数量
       for (int i = 0; i < match_count; i++) {            printf("词块 %d:\n", i + 1);            printf(" 长度: %d 个词\n", matches.length);
           // 显示内容(如果太长则截断)            if (strlen(matches.matched_text) <= 150) {                printf(" 内容: \"%s\"\n", matches.matched_text);            } else {                printf(" 内容: \"%.147s...\"\n", matches.matched_text);            }
           printf(" 文本A位置: 第 %d-%d 个词\n",                   matches.start_a + 1,                   matches.start_a + matches.length);            printf(" 文本B位置: 第 %d-%d 个词\n",                   matches.start_b + 1,                   matches.start_b + matches.length);            printf("\n");
           total_matched_words += matches.length;
           // 统计长度分布            int len_category = matches.length <= 20 ? matches.length : 20;            length_stats[len_category]++;        }
       // 计算覆盖率        float coverage = (float)total_matched_words / word_count_b * 100;        printf("统计信息:\n");        printf(" 匹配词数: %d / %d\n", total_matched_words, word_count_b);        printf(" 覆盖率: %.2f%%\n", coverage);
       // 显示长度分布        printf("\n词块长度分布:\n");        for (int i = MIN_CHUNK_SIZE; i <= 20; i++) {            if (length_stats > 0) {                printf(" %d词块: %d个\n", i, length_stats);            }        }        if (length_stats[20] > 0) {            printf(" 20+词块: %d个\n", length_stats[20]);        }
       // 风险评估        printf("\n风险评估:\n");        if (coverage > 50) {            printf(" 🚨 高风险: 超过50%%的内容可能被剽窃\n");        } else if (coverage > 30) {            printf(" ⚠️ 中风险: 发现显著相似性\n");        } else if (coverage > 15) {            printf(" ⚠️ 低风险: 发现部分相似性\n");        } else {            printf(" ✅ 相似度较低\n");        }
       // 找出最长的匹配        if (match_count > 0) {            printf("\n最长匹配词块: %d 个词\n", matches[0].length);            if (matches[0].length > 20) {                printf(" ⚠️ 发现超长匹配,强烈建议人工审查\n");            }        }    }
cleanup:    free_matches(matches, match_count);    free(text_a);    free(text_b);    free(words_a);    free(words_b);    free(matches);
   return 0; }

// 找到匹配的起始词,计算从这里开始的最大连续匹配长度
这部分有错。 因为匹配的起始点,是动态的,是变化的,而不是固定的。
m
mozartyell
gooog 发表于 2025-05-29 23:12
// 找到匹配的起始词,计算从这里开始的最大连续匹配长度
这部分有错。 因为匹配的起始点,是动态的,是变化的,而不是固定的。

#include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h>
#define MAX_TEXT_SIZE 50000 #define MAX_WORD_LEN 100 #define MIN_CHUNK_SIZE 3 #define MAX_MATCHES 2000
// 动态词块匹配结构体 typedef struct {    int start_a;          // 在文本A中的起始位置    int start_b;          // 在文本B中的起始位置    int length;           // 词块长度    char *matched_text;   // 匹配的文本内容    double similarity;    // 相似度分数 } DynamicMatch;
// 将文本转换为小写并规范化 void normalize_text(char *text) {    int i, j = 0;    for (i = 0; text; i++) {        if (isalnum(text)) {            text[j++] = tolower(text);        } else if (j > 0 && text[j-1] != ' ') {            text[j++] = ' ';        }    }    text[j] = '\0';
   // 移除多余空格    char temp[MAX_TEXT_SIZE];    strcpy(temp, text);    j = 0;    int space_flag = 0;    for (i = 0; temp; i++) {        if (temp == ' ') {            if (!space_flag) {                text[j++] = ' ';                space_flag = 1;            }        } else {            text[j++] = temp;            space_flag = 0;        }    }    if (j > 0 && text[j-1] == ' ') j--;    text[j] = '\0'; }
// 分割文本为单词数组 int split_words(char *text, char words[][MAX_WORD_LEN]) {    int count = 0;    char *text_copy = malloc(strlen(text) + 1);    strcpy(text_copy, text);
   char *token = strtok(text_copy, " ");    while (token != NULL && count < MAX_TEXT_SIZE/10) {        if (strlen(token) > 0) {            strcpy(words[count], token);            count++;        }        token = strtok(NULL, " ");    }
   free(text_copy);    return count; }
// 核心算法:动态寻找任意位置的最长匹配 // 从文本A的pos_a和文本B的pos_b开始,计算最长连续匹配 int find_longest_match_from_position(char words_a[][MAX_WORD_LEN], int count_a, int pos_a,                                    char words_b[][MAX_WORD_LEN], int count_b, int pos_b) {    int length = 0;    while (pos_a + length < count_a &&           pos_b + length < count_b &&           strcmp(words_a[pos_a + length], words_b[pos_b + length]) == 0) {        length++;    }    return length; }
// 检查匹配是否与已有匹配重叠 int has_overlap(DynamicMatch *matches, int match_count, int start_a, int start_b, int length) {    for (int i = 0; i < match_count; i++) {        // 检查在文本A中的重叠        int existing_end_a = matches.start_a + matches.length - 1;        int new_end_a = start_a + length - 1;        int overlap_a = !(new_end_a < matches.start_a || start_a > existing_end_a);
       // 检查在文本B中的重叠        int existing_end_b = matches.start_b + matches.length - 1;        int new_end_b = start_b + length - 1;        int overlap_b = !(new_end_b < matches.start_b || start_b > existing_end_b);
       if (overlap_a && overlap_b) {            return 1; // 有重叠        }    }    return 0; // 无重叠 }
// 构建匹配文本 char* build_match_text(char words[][MAX_WORD_LEN], int start_pos, int length) {    int total_len = 0;    for (int i = 0; i < length; i++) {        total_len += strlen(words[start_pos + i]) + 1;    }
   char *result = malloc(total_len + 1);    result[0] = '\0';
   for (int i = 0; i < length; i++) {        if (i > 0) strcat(result, " ");        strcat(result, words[start_pos + i]);    }
   return result; }
// 真正的动态词块检测算法 int find_dynamic_matches(char words_a[][MAX_WORD_LEN], int count_a,                        char words_b[][MAX_WORD_LEN], int count_b,                        DynamicMatch *matches) {    int match_count = 0;
   printf("开始动态词块检测...\n");    printf("算法:遍历所有可能的起始位置组合\n");
   // 核心:遍历文本B的每个位置    for (int pos_b = 0; pos_b < count_b && match_count < MAX_MATCHES; pos_b++) {        if (pos_b % 50 == 0) {            printf("扫描进度: %d/%d (%.1f%%)\r", pos_b, count_b, (float)pos_b/count_b*100);            fflush(stdout);        }
       // 对于文本B的当前位置,遍历文本A的所有位置寻找匹配        for (int pos_a = 0; pos_a < count_a; pos_a++) {            // 计算从这两个位置开始的最长匹配            int match_length = find_longest_match_from_position(words_a, count_a, pos_a,                                                              words_b, count_b, pos_b);
           // 如果匹配长度足够且无重叠            if (match_length >= MIN_CHUNK_SIZE) {                if (!has_overlap(matches, match_count, pos_a, pos_b, match_length)) {                    // 记录这个匹配                    matches[match_count].start_a = pos_a;                    matches[match_count].start_b = pos_b;                    matches[match_count].length = match_length;                    matches[match_count].matched_text = build_match_text(words_b, pos_b, match_length);                    matches[match_count].similarity = 1.0; // 完全匹配
                   match_count++;
                   // 跳过这个匹配覆盖的位置,避免重复                    pos_b += match_length - 1;                    break; // 跳出内层循环,继续下一个pos_b                }            }        }    }
   printf("\n动态检测完成!\n");    return match_count; }
// 更高级的动态检测算法(考虑部分匹配) int find_advanced_dynamic_matches(char words_a[][MAX_WORD_LEN], int count_a,                                 char words_b[][MAX_WORD_LEN], int count_b,                                 DynamicMatch *matches, float similarity_threshold) {    int match_count = 0;
   printf("开始高级动态词块检测...\n");    printf("算法:支持部分匹配,相似度阈值=%.2f\n", similarity_threshold);
   // 使用滑动窗口检测不同长度的匹配    for (int window_size = MIN_CHUNK_SIZE; window_size <= 20; window_size++) {        printf("检测 %d 词窗口...\n", window_size);
       // 遍历文本B的所有可能窗口        for (int pos_b = 0; pos_b <= count_b - window_size; pos_b++) {            // 遍历文本A的所有可能窗口            for (int pos_a = 0; pos_a <= count_a - window_size; pos_a++) {                // 计算这两个窗口的相似度                int matched_words = 0;                for (int i = 0; i < window_size; i++) {                    if (strcmp(words_a[pos_a + i], words_b[pos_b + i]) == 0) {                        matched_words++;                    }                }
               float similarity = (float)matched_words / window_size;
               // 如果相似度足够高且无重叠                if (similarity >= similarity_threshold) {                    if (!has_overlap(matches, match_count, pos_a, pos_b, window_size)) {                        matches[match_count].start_a = pos_a;                        matches[match_count].start_b = pos_b;                        matches[match_count].length = window_size;                        matches[match_count].matched_text = build_match_text(words_b, pos_b, window_size);                        matches[match_count].similarity = similarity;
                       match_count++;                        if (match_count >= MAX_MATCHES) break;                    }                }            }            if (match_count >= MAX_MATCHES) break;        }        if (match_count >= MAX_MATCHES) break;    }
   printf("高级检测完成!\n");    return match_count; }
// 按长度和相似度排序 void sort_matches(DynamicMatch *matches, int count) {    for (int i = 0; i < count - 1; i++) {        for (int j = i + 1; j < count; j++) {            // 首先按长度排序,然后按相似度排序            int should_swap = 0;            if (matches.length < matches[j].length) {                should_swap = 1;            } else if (matches.length == matches[j].length &&                      matches.similarity < matches[j].similarity) {                should_swap = 1;            }
           if (should_swap) {                DynamicMatch temp = matches;                matches = matches[j];                matches[j] = temp;            }        }    } }
// 从文件读取文本 int read_file(const char *filename, char *text) {    FILE *file = fopen(filename, "r");    if (file == NULL) {        printf("无法打开文件: %s\n", filename);        return 0;    }
   size_t bytes_read = fread(text, sizeof(char), MAX_TEXT_SIZE - 1, file);    text[bytes_read] = '\0';    fclose(file);    return 1; }
// 释放内存 void free_matches(DynamicMatch *matches, int count) {    for (int i = 0; i < count; i++) {        if (matches.matched_text != NULL) {            free(matches.matched_text);        }    } }
int main() {    char *text_a = malloc(MAX_TEXT_SIZE);    char *text_b = malloc(MAX_TEXT_SIZE);    char (*words_a)[MAX_WORD_LEN] = malloc(sizeof(char[MAX_TEXT_SIZE/5][MAX_WORD_LEN]));    char (*words_b)[MAX_WORD_LEN] = malloc(sizeof(char[MAX_TEXT_SIZE/5][MAX_WORD_LEN]));    DynamicMatch *matches = malloc(sizeof(DynamicMatch) * MAX_MATCHES);
   if (!text_a || !text_b || !words_a || !words_b || !matches) {        printf("内存分配失败!\n");        return 1;    }
   printf("=== 动态起始点词块剽窃检测程序 ===\n\n");    printf("特性:\n");    printf("• 动态发现任意位置的相似片段\n");    printf("• 不依赖固定起始词\n");    printf("• 支持完全匹配和部分匹配\n");    printf("• 真正的变长度词块检测\n\n");
   // 输入方式选择    printf("请选择输入方式:\n");    printf("1. 从文件读取\n");    printf("2. 手动输入\n");    printf("请选择 (1-2): ");
   int choice;    scanf("%d", &choice);    getchar();
   if (choice == 1) {        char filename_a[200], filename_b[200];        printf("请输入文本A的文件名: ");        fgets(filename_a, sizeof(filename_a), stdin);        filename_a[strcspn(filename_a, "\n")] = 0;
       printf("请输入文本B的文件名: ");        fgets(filename_b, sizeof(filename_b), stdin);        filename_b[strcspn(filename_b, "\n")] = 0;
       if (!read_file(filename_a, text_a) || !read_file(filename_b, text_b)) {            goto cleanup;        }    } else {        printf("\n请输入文本A:\n");        fgets(text_a, MAX_TEXT_SIZE, stdin);
       printf("\n请输入文本B:\n");        fgets(text_b, MAX_TEXT_SIZE, stdin);    }
   // 选择检测模式    printf("\n请选择检测模式:\n");    printf("1. 严格模式 (只检测完全匹配)\n");    printf("2. 宽松模式 (检测部分匹配,相似度>=0.8)\n");    printf("3. 自定义相似度阈值\n");    printf("请选择 (1-3): ");
   int mode;    scanf("%d", &mode);
   printf("\n正在预处理文本...\n");
   // 规范化文本    normalize_text(text_a);    normalize_text(text_b);
   // 分割单词    int word_count_a = split_words(text_a, words_a);    int word_count_b = split_words(text_b, words_b);
   printf("文本A: %d 个词\n", word_count_a);    printf("文本B: %d 个词\n", word_count_b);
   if (word_count_a == 0 || word_count_b == 0) {        printf("错误: 文本为空或无有效词汇\n");        goto cleanup;    }
   // 执行动态匹配    int match_count = 0;
   if (mode == 1) {        // 严格模式:只检测完全匹配        match_count = find_dynamic_matches(words_a, word_count_a, words_b, word_count_b, matches);    } else {        // 宽松模式或自定义模式        float threshold = 0.8;        if (mode == 3) {            printf("请输入相似度阈值 (0.0-1.0): ");            scanf("%f", &threshold);            if (threshold < 0.0 || threshold > 1.0) threshold = 0.8;        }        match_count = find_advanced_dynamic_matches(words_a, word_count_a, words_b, word_count_b,                                                   matches, threshold);    }
   // 排序结果    sort_matches(matches, match_count);
   // 输出结果    printf("\n");    for (int i = 0; i < 60; i++) printf("=");    printf("\n动态检测结果\n");    for (int i = 0; i < 60; i++) printf("=");    printf("\n");
   if (match_count == 0) {        printf("未发现符合条件的相似词块\n");    } else {        printf("发现 %d 个动态匹配词块:\n\n", match_count);
       int total_matched_words = 0;
       for (int i = 0; i < match_count; i++) {            printf("词块 %d:\n", i + 1);            printf(" 长度: %d 个词\n", matches.length);            printf(" 相似度: %.1f%%\n", matches.similarity * 100);
           // 显示内容            if (strlen(matches.matched_text) <= 120) {                printf(" 内容: \"%s\"\n", matches.matched_text);            } else {                printf(" 内容: \"%.117s...\"\n", matches.matched_text);            }
           printf(" 文本A位置: 第 %d-%d 个词\n",                   matches.start_a + 1,                   matches.start_a + matches.length);            printf(" 文本B位置: 第 %d-%d 个词\n",                   matches.start_b + 1,                   matches.start_b + matches.length);            printf("\n");
           total_matched_words += matches.length;        }
       // 统计分析        float coverage = (float)total_matched_words / word_count_b * 100;        printf("统计分析:\n");        printf(" 总匹配词数: %d / %d\n", total_matched_words, word_count_b);        printf(" 覆盖率: %.2f%%\n", coverage);
       if (match_count > 0) {            printf(" 最长匹配: %d 词\n", matches[0].length);            printf(" 平均长度: %.1f 词\n", (float)total_matched_words / match_count);        }
       // 风险评估        printf("\n风险评估:\n");        if (coverage > 60) {            printf(" 🚨 极高风险: 大部分内容可能被剽窃\n");        } else if (coverage > 40) {            printf(" 🚨 高风险: 显著剽窃迹象\n");        } else if (coverage > 20) {            printf(" ⚠️ 中等风险: 发现明显相似性\n");        } else if (coverage > 10) {            printf(" ⚠️ 低风险: 发现部分相似性\n");        } else {            printf(" ✅ 相似度较低\n");        }    }
cleanup:    free_matches(matches, match_count);    free(text_a);    free(text_b);    free(words_a);    free(words_b);    free(matches);
   return 0; }