文学研究助手(设计性实验)
作者:互联网
文学研究助手(设计性实验)
1. 需求分析
需求:
英文小说存于一个文本文件中。待统计的词汇集合要一次输入完毕,即统计工作必须在程序的一次运行之后就全部完成。程序的输出结果是每个词的出现次数和出现位置的行号,格式自行设计。
分析:
(1) 输入的形式和输入值的范围:将英语文章按屏幕提示放入文件中,输入查找个数为int型数字,输入英文单词为字符串形式。
(2) 输出的形式:输出int型数字,放在文件“统计词汇的结果.txt”中。
(3) 程序所能达到的功能:统计要查找的单词在文件中的个数。
(4) 测试数据:
· 以程序源代码作为英语文章放入文件中
· 查找3个单词:include while break
2. 概要设计
- 为了实现程序功能,需要定义串的抽象数据类型。
ADT String {
数据对象:D={ ai |ai∈CharacterSet, i=1,2,...,n, n≥0 }
数据关系:R1={ < ai-1, ai > | ai-1, ai ∈D, i=2,...,n }
typedef struct WORD{
char letter;
struct WORD *next;
}word; //单词节点
typedef struct{
word *first;
word *last;
}row; //行结构节点
typedef struct Row{
row r;
struct ROW *next;
}ROW; //行结构
typedef struct{
ROW *first;
ROW *last;
}article; //文章结构
基本操作:
StrAssign (&T, chars)
初始条件:chars 是字符串常量。
操作结果:把 chars 赋为 T 的值。
StrCopy (&T, S)
初始条件:串 S 已存在。
操作结果:由串 S 复制得到串 T。
DestroyString (&S)
初始条件:串 S 已存在。
操作结果:串 S 被销毁。
StrEmpty (S)
初始条件:串S已存在。
操作结果:若 S 为空串,则返回 TRUE,否则返回 FALSE。
StrLength (S)
初始条件:串 S 已存在。
操作结果:返回 S 的元素个数,称为串的长度。
StrCompare (S, T)
初始条件:串 S 和 T 已存在。
操作结果:若S > T,则返回值> 0;
若S = T,则返回值=0;
若S < T,则返回值<0。
Concat (&T, S1, S2)
初始条件:串 S1 和 S2 已存在。
操作结果:用 T 返回由 S1 和 S2联接而成的新串。
SubString (&Sub, S, pos, len)
初始条件:串S 已存在,1<=pos<=StrLength(S) 且 0<=len<=StrLength(S)-pos+1。
操作结果: Sub 返回串 S 的第 pos 个字符起长度为 len 的子串。
Index (S, T, pos)
初始条件:串S和T已存在,T是非空串,1≤pos≤StrLength(S)。
操作结果:若主串 S 中存在和串 T 值相同的子串, 则返回它在主串 S
中第pos个字符之后第一次出现的位置;否则函数值为0。
Replace (&S, T, V)
初始条件:串S, T和 V 均已存在,且 T 是非空串。
操作结果:用 V 替换主串 S 中出现的所有与(模式串)T相等的不重叠的子串。
StrInsert (&S, pos, T)
初始条件:串S和T已存在,1≤pos≤StrLength(S)+1。
操作结果:在串S的第pos个字符之前插入串T。
StrDelete (&S, pos, len)
初始条件:串S已存在,1≤pos≤StrLength(S)-len+1。
操作结果:从串S中删除第pos个字符起长度为len的子串。
ClearString (&S)
初始条件:串S已存在。
操作结果:将S清为空串。
}ADT String
3.具体代码
- 文章结构链表.h:
typedef struct WORD{
char letter;//字母
struct WORD *next;//下一个字母
}word; //单词节点
typedef struct{
word *first;//头字母指针
word *last;//尾字母指针
}row; //行结构节点
typedef struct Row{
row r;//行结构节点
struct ROW *next;//下一行的结构节点
}ROW; //行结构
typedef struct{
ROW *first;//头行指针
ROW *last;//尾行指针
}article; //文章结构
void create_article(article *a) //创建文章结构
{
a->first = (ROW*)malloc(sizeof(ROW));//首行指针分配空间
a->first->next = NULL;
a->last = a->first;//初始首位行指针相同
a->first->r.first = (word*)malloc(sizeof(word));//为首行元素分配首字母空间
a->first->r.first->next = NULL;
a->first->r.last = a->first->r.first;//首行元素的首位字母指向同一个位置
a->first->r.first->letter = 1; //第一个字母
}
void add_row(article *a) //添加行
{
ROW *p;
p = (ROW*)malloc(sizeof(ROW));
p->next = NULL;
p->r.first = (word*)malloc(sizeof(word));
p->r.last = p->r.first;
p->r.first->letter = a->last->r.first->letter+1;
a->last->next = p;
a->last = p;
}
void add_word(row *r,char ch) //添加字母
{
word *p;//先定义一个单词的指针
p = (word*)malloc(sizeof(word));//分配空间
p->letter = ch;//字母信息
p->next = NULL;
r->last->next = p;//尾插
r->last = p;//把尾指针改变
}
- KMP算法匹配.h:
void pretreatment(char a[],int b[]) //预处理->本质就是求next数组
{
int c = strlen(a) - 1; //c为字符串最后一位
do
{
int d = c - 1; //d为字符串倒数第二位
for(;;)
{
while(d >= 0&&a[c] != a[d]) //判断a[d]=a[c],不然就d--,再次判断,直至a[d]=a[c]或d<0
{
d--;
}
if(d >= 0) //d>=0说明a[d]=a[c]
{
char e[d+2],f[d+2]; //用于比较字符串
//strncpy用来指定长度的字符串copy
strncpy(e,&a[0],sizeof(e)-1); //将a[0]到a[d]复制到e中
strncpy(f,&a[c-d],sizeof(f)-1); //将a[c-d]到a[c]复制到e中
e[d+1] = f[d+1] = '\0';
if(!strcmp(&e[0],&f[0])) //如果f和e相同,说明匹配成功,令b[c]=d
{
b[c] = d;//存储的数为前后缀相等的长度-1
break;
}
else //否则继续d--,匹配
{
d--;
}
}
else //若d<0,说明a中没有与a[c]匹配的
{
b[c] = -1;
break;
}
}
c--;
}while(c >= 0); //匹配直至c<0,即a中全部字符都匹配完成才结束
}
void found(char a[],int b[],article art,FILE *w) //kmp查找
{
int number = 0; //number代表计数,用于统计查找到的个数
ROW *q = art.first; //新定义一个行指针,指向文章第一行
for(;q != NULL;q = q->next)//循环遍历所有行
{
int n=0,row_number=0; //n代表字符匹配个数,row_number代表行计数,用于统计这一行查找到的个数
word *p = q->r.first->next;//新定义一个字母指针,指向首字母
for(;p != NULL;p = p->next)//循环遍历每一行的全部字符
{
if(p->letter == a[n]) //如果匹配成功,继续下一位匹配
{
n++;
}
else if(n == 0) //若匹配不成功,且匹配的是头一个字符,则继续匹配
{
continue;
}
else if(b[n-1] == -1) //若预处理中发现不可拖动窗口,则直接继续匹配头一个字符
{
n = 0;
}
else
{
for(n = b[n-1] + 1;n != 1;n = b[n-1] + 1) //拖动窗口再次进行比较
{
if(p->letter == a[n]) //匹配成功,继续下一个匹配
{
n++;
break;
}
}
if(b[n] == -1) //匹配不成功,继续从头一个字符匹配
{
n = 0;
}
}
if(n > (strlen(a) - 1)) //全部字符匹配成功,计数加一,行计数加一 ,继续从头字符开始匹配
{
number++;
row_number++;
n=0;
}
}
if(row_number > 0) //行计数大于0,即这一行有至少一个要查找的单词,则输出
{
fprintf(w,"第%d行有%d个\n",q->r.first->letter,row_number);
}
}
fprintf(w,"共计有%d个。\n",number);
}
- 文学研究助手
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include"文章结构链表.h"
#include"KMP算法匹配.h"
#define size 100 //查找单词的大小
int main()
{
FILE *r,*w;//创建文件指针
printf("请将文章放入文件‘英文小说.txt’中(放好请按ENTER键继续)");
getchar();
if((r = fopen("英文小说.txt","rt")) == NULL)
{
exit(-1);
}
if((w = fopen("统计词汇的结果.txt","w")) == NULL)
{
exit(-1);
}
article english; //english代表英语文章
create_article(&english);//初始化文章
char ch;
for(;(fscanf(r,"%c",&ch)) != EOF;)
{
if(ch != '\n') //添加字母
{
add_word(&(english.last->r),ch);
}
else //添加行
{
add_row(&english);
}
}
printf("请输入要统计单词的个数:");
int number;
scanf("%d",&number);
// getchar();
char search[size]; //search代表查找的单词
int result[size]; //result代表预处理结果
int next[size]; //next代表用kmp算法预处理结果
int n = 0;//用于for循环
for(;n < number;n++)
{
memset(search,0,sizeof(search));//初始归0
memset(result,0,sizeof(result));//初始归0
printf("请输入第%d个单词:",n+1);
scanf("%s",&search[0]);
pretreatment(search,result);//预处理
fprintf(w,"第%d个单词%s统计结果:\n",n+1,search); //输出到文件中
found(search,result,english,w);
}
printf("查找结果放在文件“统计词汇的结果.txt”中");
return 0;
}
4.相关文件
- 英文小说.txt:
China is stepping up efforts to strengthen data security, especially that to be provided overseas, by legislation to better regulate data processing and safeguard State and personal information security.
The Cyberspace Administration of China, the country’s top internet watchdog, unveiled on Wednesday a draft regulation on managing data related to automobiles, to intensify the protection of personal and other crucial data generated on the Chinese mainland.
The draft applies to operators that design, produce, sell, maintain the service of and manage automobiles on the mainland, when they collect, analyze, store, transmit, inquire about, use and delete personal information and key data.
It stipulates that individuals must agree to the collection of their personal data, and personal information or key data should be stored on the Chinese mainland. Data that is to be provided offshore must receive and pass data exit safety assessments by the nation’s cyberspace authorities.
When operators offer personal information or key data abroad, they will need to take effective measures to clarify and supervise that receivers’ use of the data is in accordance with the purpose, scope and manner agreed to by the two sides, in order to ensure data security, according to the draft regulation.
The administration released the full text of the draft regulation online for public advice before June 11. On Wednesday night, US car manufacturer Tesla said in a statement regarding the draft regulation on Sina Weibo, China’s equivalent of Twitter, that it supports and answers the call to have a more regulated industry, and “the public is welcome to offer suggestions to the authorities”.
The latest draft follows an earlier draft law on data security that was submitted to the Standing Committee of the 13th National People’s Congress, China’s top legislature, for a second review in late April. The draft law highlighted the security of outbound data while aiming to promote the development of the data sector.
The draft law specifies that those who privately provide domestic data to judicial or law enforcement agencies overseas may face a fine of up to 1 million yuan ($155,000), and it tightens management of operators trying to take data collected or generated on the mainland to overseas areas.
Besides operators of critical information infrastructure, more data processors, including those of enterprises and data analysis centers, will also face stricter management if they produce or collect data on the mainland, according to the new draft law.
“In other words, the scope of outbound data security management in the latest draft law has been expanded,” said Zhang Tao, a lawyer at Beijing Huaxun Law Offices who specializes in the sector.
“The move is necessary, as the aim of the legislation is to ensure data activities are safe in each step,” he said, adding that security is the foundation of data use and development.
How to keep a balance between ensuring data security while promoting the sound development of the sector has become a hot topic in China, as the country has seen a rapid gowth of big data and data flow in recent years.
Liu Yaohua, a researcher at the China Academy of Information and Communications Technology, published an article in China Economic Weekly in April, in which he said China urgently needed to improve rules on cross-border data flow, as the data sector is booming and many new challenges have surfaced.
The article cited a survey by the Washington-based Brookings Institution, which found that between 2009 and 2018, cross-border data flow contributed about 10 percent to global GDP growth, and the contribution could exceed $11 trillion by 2025. It also noted that the United States, the European Union and other countries or regions have made different cross-border data flow policies based on their own security and economic growth situations.
Liu said a series of high-level policies and rules on cross-border data flow have been implemented at the national, industrial and enterprise levels in China since the Chinese Cybersecurity Law took effect in 2017. He also said China should ensure that data will flow in a legal and orderly manner.
Li Guangqian, a researcher at the Development Research Center of the State Council, said that at present, countries have different attitudes toward cross-border data flow, but he believes that protecting personal information and key data for a country is as important as promoting the development of the data industry.
“China should deal with the cross-border data flow from the perspective of compliance, and formulate relevant rules in line with our own national conditions,” he was quoted as saying by the Financial News in April.
标签:word,next,文学,助手,实验,last,ROW,data,first 来源: https://blog.csdn.net/weixin_51877075/article/details/120614338