Imperial Titles in the Theodosian Code

Frequency of Imperial Titles in the Theodosian Code

titlesincth

 

Frequency of Those Words (not just as titles) in the Theodosian Code

nontitle

 

Frequency of Clementia as Imperial Title, by Reign

byreign

 

Code

#THEODOSIAN CODE

CTh.scan <- scan(“~/Education/Emory/Coursework/Digital Humanities Methods/Project/Theodosian Code Raw Text.txt”,
what=”character”, sep=”\n”)
CTh.df <- data.frame(CTh.scan, stringsAsFactors=FALSE)
CTh.df <- str_replace_all(string = CTh.df$CTh.scan, pattern = “[:punct:]”, replacement = “”)
CTh.df <- data.frame(CTh.df, stringsAsFactors = FALSE)
CTh.lines <- tolower(CTh.df[,1])
book.headings <- grep(“book”, CTh.lines)
start.lines <- book.headings + 1
end.lines <- book.headings[2:length(book.headings)] – 1
end.lines <- c(end.lines, length(CTh.lines))
CTh.df <- data.frame(“start” = start.lines, “end”=end.lines, “text”=NA)
i <- 1
for (i in 1:length(CTh.df$end))
{CTh.df$text[i] <- paste(CTh.lines[CTh.df$start[i]:CTh.df$end[i]], collapse = ” “)}

CTh.df$Book <- seq.int(nrow(CTh.df))

#String Extracts of Imperial Titles

str_extract_all(string = CTh.df$text, pattern = “.{0,80}nostra.{0,80}aeternita.{0,80}|.{0,80}aeternita.{0,80}nostra.{0,80}|.{0,80}mea.{0,80}aeternita.{0,80}|.{0,80}aeternita.{0,80}mea.{0,80}”) #AETERNITAS

str_extract_all(string = CTh.df$text, pattern = “.{0,80}nostra.{0,80}clementia.{0,80}|.{0,80}clementia.{0,80}nostra.{0,80}|.{0,80}mea.{0,80}clementia.{0,80}|.{0,80}clementia.{0,80}mea.{0,80}”) #CLEMENTIA

str_extract_all(string = CTh.df$text, pattern = “.{0,80}nostra.{0,80}lenita.{0,80}|.{0,80}lenita.{0,80}nostra.{0,80}|.{0,80}mea.{0,80}lenita.{0,80}|.{0,80}lenita.{0,80}mea.{0,80}”) #LENITAS

str_extract_all(string = CTh.df$text, pattern = “.{0,80}nostra.{0,80}lenitud.{0,80}|.{0,80}lenitud.{0,80}nostra.{0,80}|.{0,80}mea.{0,80}lenitud.{0,80}|.{0,80}lenitud.{0,80}mea.{0,80}”) #LENITUDO

str_extract_all(string = CTh.df$text, pattern = “.{0,80}nostra.{0,80}maiesta.{0,80}|.{0,80}maiesta.{0,80}nostra.{0,80}|.{0,80}mea.{0,80}maiesta.{0,80}|.{0,80}maiesta.{0,80}mea.{0,80}”) #MAIESTAS

str_extract_all(string = CTh.df$text, pattern = “.{0,80}nostra.{0,80}mansuetud.{0,80}|.{0,80}mansuetud.{0,80}nostra.{0,80}|.{0,80}mea.{0,80}mansuetud.{0,80}|.{0,80}mansuetud.{0,80}mea.{0,80}”) #MANSUETUDO

str_extract_all(string = CTh.df$text, pattern = “.{0,80}nostra.{0,80}moderatio.{0,80}|.{0,80}moderatio.{0,80}nostra.{0,80}|.{0,80}mea.{0,80}moderatio.{0,80}|.{0,80}moderatio.{0,80}mea.{0,80}”) #MODERATIO

str_extract_all(string = CTh.df$text, pattern = “.{0,80}nostrum.{0,80}numen.{0,80}|.{0,80}numen.{0,80}nostrum.{0,80}|.{0,80}nostr.{0,80}numin.{0,80}|.{0,80}numin.{0,80}nostr.{0,80}|.{0,80}meum.{0,80}numen.{0,80}|.{0,80}numen.{0,80}meum.{0,80}|.{0,80}me.{0,80}numin.{0,80}|.{0,80}numin.{0,80}me.{0,80}”) #NUMEN

str_extract_all(string = CTh.df$text, pattern = “.{0,80}nostra.{0,80}perennita.{0,80}|.{0,80}perennita.{0,80}nostra.{0,80}|.{0,80}mea.{0,80}perennita.{0,80}|.{0,80}perennita.{0,80}mea.{0,80}”) #PERENNITAS

str_extract_all(string = CTh.df$text, pattern = “.{0,80}nostra.{0,80}pieta.{0,80}|.{0,80}pieta.{0,80}nostra.{0,80}|.{0,80}mea.{0,80}pieta.{0,80}|.{0,80}pieta.{0,80}mea.{0,80}”) #PIETAS

str_extract_all(string = CTh.df$text, pattern = “.{0,80}nostra.{0,80}scientia.{0,80}|.{0,80}scientia.{0,80}nostra.{0,80}|.{0,80}mea.{0,80}scientia.{0,80}|.{0,80}scientia.{0,80}mea.{0,80}”) #SCIENTIA

str_extract_all(string = CTh.df$text, pattern = “.{0,80}nostra.{0,80}serenita.{0,80}|.{0,80}serenita.{0,80}nostra.{0,80}|.{0,80}mea.{0,80}serenita.{0,80}|.{0,80}serenita.{0,80}mea.{0,80}”) #SERENITAS

str_extract_all(string = CTh.df$text, pattern = “.{0,80}nostra.{0,80}tranquillita.{0,80}|.{0,80}tranquillita.{0,80}nostra.{0,80}|.{0,80}mea.{0,80}tranquillita.{0,80}|.{0,80}tranquillita.{0,80}mea.{0,80}”) #TRANQUILLITAS

#Imperial Title Sums

aeternitas <- 2
clementia <- 93
lenitas <- 2
lenitudo <- 2
maiestas <- 12
mansuetudo <- 59
moderatio <- 2
numen <- 27
perennitas <- 12
pietas <- 9
scientia <- 32
serenitas <- 57
tranquillitas <- 10

#Imperial Title Sum Graph

Frequency <- c(aeternitas, clementia, lenitas, lenitudo, maiestas, mansuetudo, moderatio, numen, perennitas, pietas, scientia, serenitas, tranquillitas)
Title <- c(“Aeternitas”, “Clementia”, “Lenitas”, “Lenitudo”, “Maiestas”, “Mansuetudo”, “Moderatio”, “Numen”, “Perennitas”, “Pietas”, “Scientia”, “Serenitas”, “Tranquillitas”)
sum.df <- cbind.data.frame(Title, Frequency)
sum.df$Title <- factor(sum.df$Title, levels = sum.df$Title[order(sum.df$Frequency)]) #Reorders dataframe based on Frequency

ggplot(data=sum.df, aes(x=Title, Frequency), y=Frequency) + geom_bar(stat = “identity”) + coord_flip() #Word Total Graph

#Non-Title Frequencies

aeternitas <- sum(str_count(CTh.df$text, “aeternita”), na.rm = TRUE)
clementia <- sum(str_count(CTh.df$text, “clementia”), na.rm = TRUE)
lenitas <- sum(str_count(CTh.df$text, “lenita”), na.rm = TRUE)
lenitudo <- sum(str_count(CTh.df$text, “lenitud”), na.rm = TRUE)
maiestas <- sum(str_count(CTh.df$text, “maiesta”), na.rm = TRUE)
mansuetudo <- sum(str_count(CTh.df$text, “mansuetud”), na.rm = TRUE)
moderatio <- sum(str_count(CTh.df$text, “moderatio”), na.rm = TRUE)
numen <- sum(str_count(CTh.df$text, “numen|numin”), na.rm = TRUE)
perennitas <- sum(str_count(CTh.df$text, “perennita”), na.rm = TRUE)
pietas <- sum(str_count(CTh.df$text, “pieta”), na.rm = TRUE)
scientia <- sum(str_count(CTh.df$text, “scientia”), na.rm = TRUE)
serenitas <- sum(str_count(CTh.df$text, “serenita”), na.rm = TRUE)
tranquillitas <- sum(str_count(CTh.df$text, “tranquillita”), na.rm = TRUE)

Frequency <- c(aeternitas, clementia, lenitas, lenitudo, maiestas, mansuetudo, moderatio, numen, perennitas, pietas, scientia, serenitas, tranquillitas)
Title <- c(“Aeternitas”, “Clementia”, “Lenitas”, “Lenitudo”, “Maiestas”, “Mansuetudo”, “Moderatio”, “Numen”, “Perennitas”, “Pietas”, “Scientia”, “Serenitas”, “Tranquillitas”)
sum.df <- cbind.data.frame(Title, Frequency)
sum.df$Title <- factor(sum.df$Title, levels = sum.df$Title[order(sum.df$Frequency)]) #Reorders dataframe based on Frequency

ggplot(data=sum.df, aes(x=Title, Frequency), y=Frequency) + geom_bar(stat = “identity”) + coord_flip() #Word Total Graph

#Title Frequency By Reign

constantine <- 9
constantius <- 5
valentinian1 <- 5
valens <- 3
gratian <- 1
valentinian2 <- 1
theodosius1 <- 7
honorius <- 21
arcadius <- 9
theodosius2 <- 23

Frequency <- c(constantine, constantius, valentinian1, valens, gratian, valentinian2, theodosius1, honorius, arcadius, theodosius2)
Title <- c(“Constantine (306-337)”, “Constantius (337-361)”, “Valentinian I (364-375)”, “Valens (364-378)”, “Gratian (375-383)”, “Valentinian II (375-392)”, “Theodosius I (379-395)”, “Honorius (395-423)”, “Arcadius (395-408)”, “Theodosius II (408-450)”)
sum.df <- cbind.data.frame(Title, Frequency)
sum.df$Title <- factor(sum.df$Title, levels = sum.df$Title[order(sum.df$Frequency)]) #Reorders dataframe based on Frequency

ggplot(data=sum.df, aes(x=Title, Frequency), y=Frequency) + geom_bar(stat = “identity”) + labs(x = “Emperor”) + coord_flip() #Word Total Graph

Imperial Titles in Late Roman Documents

Sorry for the delay on my blog post! I’ve finally managed to figure out the coding to search for all inflections of the various nostra/mea epithets in Latin documents. I was having trouble using .*? to account for varying numbers of characters between nostra/mea and its accompanying noun (e.g. nostra clementia), as R was, despite the “?”, still being far too greedy. str_locate_all showed that it was pairing nostra‘s and titles that were thousands of characters apart!

My solution has been to ask R to search for combinations of nostra/mea and the accompanying noun with anywhere from 0 to 80 characters inbetween. Furthermore, I’ve simplified my code by only searching for the parts of these words that don’t inflect. So, for example, I wrote:

str_extract_all(string = CTh.df$text, pattern = “nostra.{0,80}clementia.{0,80}|clementia.{0,80}nostra.{0,80}|mea.{0,80}clementia.{0,80}|clementia.{0,80}mea.{0,80}”) #CLEMENTIA

This accounts for all inflections; it turns up nostra/mea clementianostrae/meae clementiae, and nostram/meam clementiam. I did this for all of the imperial epithets that I have identified within the Theodosian Code. I then used those results to locate and read each instance in the Latin text, both so as to confirm their use as imperial epithets within their respective contexts, and so as to record their exact location within the Code. It’s been time consuming, but very rewarding. I now have complete and accurate results for their frequency within the Code:

rplot

Now that I have an effective formula down, I will run through the rest of my documents this week: the main ones are the Code of Justinian, Symmachus’ Relationes to the emperors, and a series of Latin Panegyrics. I hope to have a few of these done before class on Thursday; I’ll update this post with those results.

My code

#THEODOSIAN CODE

CTh.scan <- scan(“~/Education/Emory/Coursework/Digital Humanities Methods/Project/Theodosian Code Raw Text.txt”,
what=”character”, sep=”\n”)
CTh.df <- data.frame(CTh.scan, stringsAsFactors=FALSE)
CTh.df <- str_replace_all(string = CTh.df$CTh.scan, pattern = “[:punct:]”, replacement = “”)
CTh.df <- data.frame(CTh.df, stringsAsFactors = FALSE)
CTh.lines <- tolower(CTh.df[,1])
book.headings <- grep(“book”, CTh.lines)
start.lines <- book.headings + 1
end.lines <- book.headings[2:length(book.headings)] – 1
end.lines <- c(end.lines, length(CTh.lines))
CTh.df <- data.frame(“start” = start.lines, “end”=end.lines, “text”=NA)
i <- 1
for (i in 1:length(CTh.df$end))
{CTh.df$text[i] <- paste(CTh.lines[CTh.df$start[i]:CTh.df$end[i]], collapse = ” “)}

CTh.df$Book <- seq.int(nrow(CTh.df))

#String Extracts of Imperial Titles

str_extract_all(string = CTh.df$text, pattern = “nostra.{0,80}aeternita.{0,80}|aeternita.{0,80}nostra.{0,80}|mea.{0,80}aeternita.{0,80}|aeternita.{0,80}mea.{0,80}”) #AETERNITAS

str_extract_all(string = CTh.df$text, pattern = “nostra.{0,80}clementia.{0,80}|clementia.{0,80}nostra.{0,80}|mea.{0,80}clementia.{0,80}|clementia.{0,80}mea.{0,80}”) #CLEMENTIA

str_extract_all(string = CTh.df$text, pattern = “nostra.{0,80}lenita.{0,80}|lenita.{0,80}nostra.{0,80}|mea.{0,80}lenita.{0,80}|lenita.{0,80}mea.{0,80}”) #LENITAS

str_extract_all(string = CTh.df$text, pattern = “nostra.{0,80}lenitud.{0,80}|lenitud.{0,80}nostra.{0,80}|mea.{0,80}lenitud.{0,80}|lenitud.{0,80}mea.{0,80}”) #LENITUDO

str_extract_all(string = CTh.df$text, pattern = “nostra.{0,80}maiesta.{0,80}|maiesta.{0,80}nostra.{0,80}|mea.{0,80}maiesta.{0,80}|maiesta.{0,80}mea.{0,80}”) #MAIESTAS

str_extract_all(string = CTh.df$text, pattern = “nostra.{0,80}mansuetud.{0,80}|mansuetud.{0,80}nostra.{0,80}|mea.{0,80}mansuetud.{0,80}|mansuetud.{0,80}mea.{0,80}”) #MANSUETUDO

str_extract_all(string = CTh.df$text, pattern = “nostra.{0,80}moderatio.{0,80}|moderatio.{0,80}nostra.{0,80}|mea.{0,80}moderatio.{0,80}|moderatio.{0,80}mea.{0,80}”) #MODERATIO

str_extract_all(string = CTh.df$text, pattern = “nostrum.{0,80}numen.{0,80}|numen.{0,80}nostrum.{0,80}|nostr.{0,80}numin.{0,80}|numin.{0,80}nostr.{0,80}|meum.{0,80}numen.{0,80}|numen.{0,80}meum.{0,80}|me.{0,80}numin.{0,80}|numin.{0,80}me.{0,80}”) #NUMEN

str_extract_all(string = CTh.df$text, pattern = “nostra.{0,80}perennita.{0,80}|perennita.{0,80}nostra.{0,80}|mea.{0,80}perennita.{0,80}|perennita.{0,80}mea.{0,80}”) #PERENNITAS

str_extract_all(string = CTh.df$text, pattern = “nostra.{0,80}pieta.{0,80}|pieta.{0,80}nostra.{0,80}|mea.{0,80}pieta.{0,80}|pieta.{0,80}mea.{0,80}”) #PIETAS

str_extract_all(string = CTh.df$text, pattern = “nostra.{0,80}scientia.{0,80}|scientia.{0,80}nostra.{0,80}|mea.{0,80}scientia.{0,80}|scientia.{0,80}mea.{0,80}”) #SCIENTIA

str_extract_all(string = CTh.df$text, pattern = “nostra.{0,80}serenita.{0,80}|serenita.{0,80}nostra.{0,80}|mea.{0,80}serenita.{0,80}|serenita.{0,80}mea.{0,80}”) #SERENITAS

str_extract_all(string = CTh.df$text, pattern = “nostra.{0,80}tranquillita.{0,80}|tranquillita.{0,80}nostra.{0,80}|mea.{0,80}tranquillita.{0,80}|tranquillita.{0,80}mea.{0,80}”) #TRANQUILLITAS

#Imperial Title Sums

aeternitas <- 2
clementia <- 93
lenitas <- 2
lenitudo <- 2
maiestas <- 12
mansuetudo <- 59
moderatio <- 2
numen <- 27
perennitas <- 12
pietas <- 9
scientia <- 32
serenitas <- 57
tranquillitas <- 10

#Imperial Title Sum Graph

Frequency <- c(clementia, mansuetudo, serenitas, scientia, numen, maiestas, tranquillitas, pietas, aeternitas, lenitas, lenitudo, moderatio)
Title <- c(“Clementia”, “Mansuetudo”, “Serenitas”, “Scientia”, “Numen”, “Maiestas”, “Tranquillitas”, “Pietas”, “Aeternitas”, “Lenitas”, “Lenitudo”, “Moderatio”)
sum.df <- cbind.data.frame(Title, Frequency)
sum.df$Title <- factor(sum.df$Title, levels = sum.df$Title[order(sum.df$Frequency)]) #Reorders dataframe based on Frequency

ggplot(data=sum.df, aes(x=Title, Frequency), y=Frequency) + geom_bar(stat = “identity”) + coord_flip() #Word Total Graph

Index of Imperial Epithets in the Theodosian Code

Nostra Aeternitas

10.22.3

Mea Aeternitas

12.1.160

Nostra Clementia

1.1.5
1.7.4
1.14.1
2.6.1
2.8.20
2.23.1
5.1.2
5.2.1
5.15.21
5.16.31
6.2.26
6.4.18
6.4.33
6.23.4
6.30.4
6.35.14
7.1.16
7.1.17
7.4.21
7.4.25
7.6.5
7.13.13
7.21.4
8.5.1
8.5.5
8.5.30
8.5.44
8.5.50
8.5.54
8.5.56
8.5.57
8.10.3
9.16.12
9.17.2
9.21.6
9.34.7
9.40.16
9.40.16
9.41.1
9.45.4
10.1.16
10.10.26
10.10.32
10.10.34
10.14.1
10.15.2
11.7.15
11.16.7
11.16.8
11.20.4
11.28.3
11.28.14
11.30.13
11.30.54
11.30.57
11.30.61
11.36.24
12.1.14
12.1.14
12.1.15
12.1.146
12.1.169
12.1.184
12.6.30
12.10.1
12.12.4
12.12.14
13.1.20
13.3.17
14.10.3
14.15.5
14.17.5
14.17.14
15.1.44
15.1.49
15.3.4
15.6.1
16.1.2
16.2.42
16.3.2
16.5.46
16.5.49
16.5.54
16.5.54
16.5.60
16.5.63
16.8.17
16.11.2

Mea Clementia

1.8.2
1.8.3
6.26.17
7.16.2
11.20.5

Nostra Lenitas

1.22.2
10.8.3

Nostra Lenitudo

8.12.6
15.1.5

Nostra Maiestas

6.21.1
6.27.17
6.27.17
8.4.26
8.5.39
11.29.1
11.30.66
11.30.68
13.3.18
14.3.18
15.1.47
16.10.20

Nostra Mansuetudo

1.2.8
1.5.9
1.10.1
1.15.8
1.28.1
3.9.1
4.14.1
6.2.19
6.22.8
6.23.4
6.30.18
6.30.20
7.13.9
8.5.12
8.5.22
8.5.54
8.5.58
8.8.2
8.10.2
9.16.10
9.30.2
10.7.2
10.7.2
10.9.2
10.9.3
10.10.20
10.16.2
11.7.21
11.12.4
11.16.11
11.16.14
11.28.3
11.28.5
11.30.32
11.30.41
11.30.41
12.6.5
12.6.12
12.6.28
12.12.5
12.12.10
12.12.10
12.19.3
13.3.4
13.5.38
13.6.5
14.1.2
14.4.3
14.9.1
15.3.1
15.5.5
15.7.4
15.7.6
15.7.9
16.2.12
16.5.7
16.5.38
16.10.2

Mea Mansuetudo

12.1.121

Nostra Moderatio

6.30.24
8.18.3

Nostrum Numen

1.2.12
1.9.2
2.23.1
2.33.4
5.12.3
5.12.3
6.4.29
6.4.32
6.5.2
6.14.3
6.23.3
6.30.15
7.7.4
7.8.3
8.1.13
8.5.40
8.5.62
9.40.11
11.21.3
11.28.15
11.30.49
12.12.7
15.4.1
15.5.5
16.4.4
16.8.13

Meum Numen

11.1.33

Nostra Perennitas

1.1.5
2.4.4
4.4.5
5.15.18
7.7.4
9.19.3
9.38.8
10.20.10
12.12.9
13.5.12
15.1.31

Mea Perennitas

6.30.21

Nostra Pietas

5.12.3
6.10.1
10.26.1
11.1.34
11.1.36
13.1.21
14.26.2
15.1.37

Mea Pietas

14.16.2

Nostra Serenitas

1.1.2
1.12.5
1.22.2
2.16.2
4.4.3
5.13.2
5.16.31
6.8.1
6.22.3
6.23.1
6.26.13
6.27.8
6.29.3
6.30.17
7.1.17
7.8.10
8.5.14
8.5.22
8.5.32
8.5.45
8.5.48
8.5.56
8.7.16
9.19.3
9.38.6
9.38.9
9.40.7
9.40.20
9.42.14
9.42.19
9.42.20
10.10.11
11.2.5
11.16.20
11.28.4
11.30.47
11.30.56
11.30.64
11.31.9
11.31.9
12.13.6
13.10.8
14.2.1
14.4.8
15.1.11
15.1.26
15.1.42
15.1.51
15.5.5
15.7.6
15.7.6
16.2.37
16.5.12
16.5.14
16.8.22
16.11.3

Mea Serenitas
11.20.5

Nostra Scientia

1.1.5
1.5.1
1.15.2
1.16.6
1.29.1
2.18.1
6.4.21
7.1.12
8.5.25
9.1.1
9.1.13
9.4.1
9.21.1
9.34.3
10.8.3
11.7.16
11.16.8
11.16.8
11.29.2
11.30.1
11.30.1
11.30.9
11.30.18
11.30.18
11.37.1
12.1.1
12.12.3
15.1.2
15.1.2
15.1.30
16.10.1
16.10.15

Nostra Tranquillitas

1.2.10
1.6.4
5.15.18
6.4.31
6.12.1
8.7.16
11.30.31
16.1.4
16.2.15
16.4.1

 

Quantitative Analysis of Imperial Titles in the Theodosian Code

In the Later Roman Empire (4th-6th centuries AD), the Roman emperors frequently referred to themselves (and were referred to) with rhetorical appellations such as Nostra Clementia (“Our Clemency”) and Nostra Tranquillitas (“Our Tranquility”). These titles are ubiquitous in the Late Roman Law codes, and in a number of letters, panegyrics, and other writings addressed to the emperors. I am interested in conducting both “distant” and “close” readings of the usage of these titles, and so am using R for the former.

For this week’s blog post, I have taken the raw text of the Theodosian Code, a fifth century legal compilation of imperial laws, and searched for occurrences of the terms (in all of their inflections) Nostra ClementiaNostra MansuetudoNostra Tranquillitas, and Nostra Serenitas. The Theodosian Code is divided into 16 “Books”, and so I chunked the text accordingly:

Book Clementia Mansuetudo Tranquillitas Serenitas
1 3 3 1 3
2 3 0 0 1
3 0 0 0 0
4 0 0 0 1
5 4 0 1 2
6 6 3 2 7
7 7 1 0 2
8 9 4 1 6
9 7 1 0 8
10 3 5 0 1
11 8 7 1 8
12 10 6 0 1
13 2 2 0 1
14 4 1 0 2
15 4 5 0 7
16 11 3 3 4

The Theodosian Code contains laws dating from the reign of Constantine (306-337) through the early fifth century. The mass of imperial constitutions from this period was pruned and excerpted by the Code’s compilers, and organized into 16 Books according to subject matter. In some instances, the same law was split up, and its various pieces were placed in different parts of the Code. Therefore, there is not much utility in attempting to chart the changes in word frequency over the Code’s different sections. That being said, some (cautious) conclusions can be made about why the words are more frequent in certain Books of the Code rather than in others. For example, Nostra Clementia sees a spike in Book 8 because it deals with financial privileges and penalties – matters in which the emperor’s clemency was often invoked.

rplot

 

More immediately pertinent may be the sheer total number occurrences of each title within the Theodosian Code. Of the terms searched, Nostra Clementia is clearly the most common; this is understandable, for the emperor’s clemency was often invoked in his capacity as supreme legislator and judge.

rplot

 

I intend to continue to run searches for other imperial titles, both within the Theodosian Code, and in other texts. Once I have perfected my coding, it will be easy to replicate. The one major issue with which I am still faced, however, is the fact that word order matters little in Latin, and while I have found all of the instances of Nostra Clementia and Clementia Nostra, there are instances within the Code where other words are interposed between Nostra and Clementia. For example:

capture

 

The phrase nostra scilicet super eorum nominibus edocenda clementia, “Our Clemency certainly ought to be informed of their names”, interposes the rest of the clause between nostra and clementia. I still need to figure out how to get R to find these instances and include them in my counts.

 

Code

CTh.scan <- scan(“~/Education/Emory/Coursework/Digital Humanities Methods/Project/Theodosian Code Raw Text.txt”,
what=”character”, sep=”\n”)
CTh.df <- data.frame(CTh.scan, stringsAsFactors=FALSE)
CTh.df <- str_replace_all(string = CTh.df$CTh.scan, pattern = “[:punct:]”, replacement = “”)
CTh.df <- data.frame(CTh.df, stringsAsFactors = FALSE)
CTh.lines <- tolower(CTh.df[,1])
book.headings <- grep(“book”, CTh.lines)
start.lines <- book.headings + 1
end.lines <- book.headings[2:length(book.headings)] – 1
end.lines <- c(end.lines, length(CTh.lines))
CTh.df <- data.frame(“start” = start.lines, “end”=end.lines, “text”=NA)
i <- 1
for (i in 1:length(CTh.df$end))
{CTh.df$text[i] <- paste(CTh.lines[CTh.df$start[i]:CTh.df$end[i]], collapse = ” “)}

CTh.df$Book <- seq.int(nrow(CTh.df))

CTh.df$Clementia <- str_count(string = CTh.df$text, pattern =
“nostra clementia|clementia nostra|nostrae clementiae|clementiae nostrae|nostram clementiam|clementiam nostram”)

CTh.df$Mansuetudo <- str_count(string = CTh.df$text, pattern =
“nostra mansuetudo|mansuetudo nostra|nostae mansuetudinis|mansuetudinis nostrae|
nostrae mansuetudini|mansuetudini nostrae|nostram mansuetudinem|mansuetudinem nostram|
nostra mansuetudine|mansuetudine nostra”)

CTh.df$Tranquillitas <- str_count(string = CTh.df$text, pattern =
“nostra tranquillitas|tranquillitas nostra|nostrae tranquillitatis|tranquillitatis nostrae|
nostrae tranquillitati|tranquillitati nostrae|nostram tranquillitatem|tranquillitatem nostram|
nostra tranquillitate|tranquillitate nostra”)

CTh.df$Serenitas <- str_count(string = CTh.df$text, pattern =
“nostra serenitas|serenitas nostra|nostrae serenitatis|serenitatis nostrae|nostrae serenitati|serenitati nostrae|
nostram serenitatem|serenitatem nostram|nostra serenitate|serenitate nostra”)

frequency.long <- melt(CTh.df, id = “Book”, measure = c(“Clementia”, “Mansuetudo”, “Tranquillitas”, “Serenitas”))
ggplot(frequency.long, aes(Book, value, colour = variable)) + geom_line() + ylab(“Frequency”) #Create Frequency Graph
clementia.sum <- sum(CTh.df$Clementia)
mansuetudo.sum <- sum(CTh.df$Mansuetudo)
tranquillitas.sum <- sum(CTh.df$Tranquillitas)
serenitas.sum <- sum(CTh.df$Serenitas)
Total <- c(clementia.sum, mansuetudo.sum, tranquillitas.sum, serenitas.sum)
Word <- c(“Clementia”, “Mansuetudo”, “Tranquillitas”, “Serenitas”)
word.sum.df <- cbind.data.frame(Word, Total)
ggplot(data=word.sum.df, aes(x=Word, y=Total)) + geom_bar(stat = “identity”) #Word Total Graph

 

Mixed Results with the Aeneid

Code A

I must confess to getting a bit of a late start on this week’s blog post (busy week), and as a result I have found myself stuck on a particular line of the chunking code that I have yet to trial-and-error my way through. The 12 book (read: chapter) divisions of the Aeneid are listed as “Liber I, Liber II, Liber III, etc.”, and I can’t quite get the grep function (which I admittedly still do not fully understand) to mark these headings. I believe that the line of code as I have it (bolded below) indicates the phrase “LIBER + (some combination of Roman numerals”, but even so R comes back with 23 hits instead of the expected 12.

What I had intended to do was to track the occurrences of “virtus” (~manly martial virtuous excellence) and “pius” (~reverent toward the gods and one’s family and duty), both of which are major themes in the Aeneid. Perhaps I will be able to do so once I figure out what’s tripping me up with the grep function. Again, apologies for not coming to Dr. Ravina with this sooner.

Aeneid.lines.scan <- scan(
“~/Education/Emory/Coursework/Digital Humanities Methods/RStudios Practice/Aeneid Raw Text.txt”,
what=”character”, sep=”\n”) # Scan Aeneid Raw Text

start.line <-
which(Aeneid.lines.scan==”PUBLI VERGILI MARONIS”)
end.line <- which(Aeneid.lines.scan==”vitaque cum gemitu fugit indignata sub umbras.”)

poem.lines <- Aeneid.lines.scan[start.line : end.line]
book.headings <- grep(“^[LIBER I|V|X]*$”, poem.lines)
start.lines <- book.headings + 1

end.lines <- book.headings[2:length(book.headings)] – 3
end.lines <- c(end.lines, length(poem.lines))

Aeneid.df <- data.frame(“start” = start.lines, “end”=end.lines, “text”=NA)
i <- 1
for (i in 1:length(Aeneid.df$end))
{Aeneid.df$text[i] <- paste(poem.lines[Aeneid.df$start[i]:Aeneid.df$end[i]], collapse = ” “)} View(Aeneid.df)
Aeneid.df$virtus <-
str_count(string = Aeneid.df$text, pattern = “\\Wvirtus\\W|\\WVirtus\\W”)

Aeneid.df$book <- seq(1,12,1)
plot(Aeneid.df$book, Aeneid.df$virtus)

Aeneid.df$pius <-
str_count(string = Aeneid.df$text, pattern = “\\Wpius\\W|\\WPius\\W”)

Aeneid.df$book <- seq(1,12,1)
plot(Aeneid.df$book, Aeneid.df$pius)

Code B

I had more success dealing with the KWIC analysis (although I should point out that in both this and the previous set of coding, I am still hampered by my ignorance of stemming and NLTK for Latin. Here I looked at the context in which one found the word “pius” with either “Aeneas” (to whom the epithet is often given) or “At” (meaning “but”, and something that I noticed appeared in a number of lines with “pius”).

poem.total <- paste(poem.lines, collapse=” “)
length(poem.total)

nchar(poem.total)

poeml.total <- tolower(poem.total)

poem.words <- unlist(str_split(poem.total, “\\W”))
length(poem.words)

poem.words <- poem.words[which(poem.words!=””)]
length(poem.words)

locations.kwic <- which(poem.words==’pius’)
start.kwic <- locations.kwic – 5
end.kwic <- locations.kwic + 5
start.kwic <- ifelse(start.kwic>0, start.kwic, 0)
end.kwic <- ifelse(end.kwic<length(poem.words),
end.kwic, length(poem.words))

KWIC.df <- data.frame(“start” = start.kwic, “end” = end.kwic, “text” = NA)

i <- 1
for (i in 1:length(KWIC.df$start)){
text <- poem.words[KWIC.df$start[i]:KWIC.df$end[i]]
KWIC.df$text[i] <- paste(text, collapse = ” “)
}

view(text)

index.no <- which(poem.words==’pius’)
context.count <- str_count(KWIC.df$text, “Aeneas|At”)
plot(index.no, context.count)

rplot

Text Mining I – The Latin Text of Vergil’s Aeneid

I began by copying the Latin text of Vergil’s Aeneid from Project Gutenberg and pasting it to a word document. I had R pull the raw text from that document, and then I converted it into a dataframe. The Aeneid is divided into twelve “books” (closer in length to our “chapters”), so I had to scroll through the dataframe and note the line numbers of the beginning and ending of each book, so that I could direct R to leave out the subheadings between them. The rest proceeded much as it did with Shakespeare’s Sonnets last week, and fortunately punctuation in Latin is a non-ancient convention, so it was easy for me to eliminate that from the text without it affecting the words.

The only major problem that I ran into (and still need to figure out how to resolve) is the fact that Latin is a highly inflected language. Depending on how words are being used in a sentence, their endings change, and this complicates any attempts at counting word frequencies (e.g. haec is one of the most common words in the poem, but its other forms, hic, hoc, hanc, huius, huic, etc, were counted separately). That being said, the commonest words that showed up in my (imperfect) dataframe were conjunctions, prepositions, and particles (et, meaning “and”, is by far the most common word in the Aeneid), which generally do not change form. Finally, the fact that I used a Latin text (which is what I would do when using R for my actual research) meant that it was more difficult to compare and contrast the Aeneid’s word frequencies with those of Shakespeare, but they did have a few words in common: “in”, “me”, and “o”, which have roughly the same uses in both Latin and English (and English of course gets those words from Latin).

Code

Aeneid.lines.scan <- scan(“~/Education/Emory/Coursework/Digital Humanities Methods/RStudios Practice/Aeneid Raw Text.txt”, what=”character”, sep=”\n”) # Scan Aeneid Raw Text

Aeneid.lines.df <- data.frame(Aeneid.lines.scan, stringsAsFactors = FALSE) # Put into a dataframe

Aeneid.lines <- Aeneid.lines.df[c(27:782, 786:1589, 1593:2310, 2314:3018, 3022:3892, 3896:4796, 4800:5616, 5620:6350, 6354:7171, 7175:8082, 8086:9000, 9004:9955),] # Eliminate non-text lines

Aeneid.string <- paste(Aeneid.lines, collapse=” “)

Aeneid.words <- str_split(string=Aeneid.string, pattern = ” “)

Aeneid.words <- unlist(Aeneid.words)

Aeneid.freq.df <- data.frame(table(Aeneid.words))

Aeneid.words <- Aeneid.words[which(Aeneid.words!=””)] # Remove white space

Aeneid.words.df <- data.frame(Aeneid.words)

Aeneid.words.df$lower <- tolower(Aeneid.words.df[,1])

colnames(Aeneid.words.df)[1] <- “words”
Aeneid.words.df$clean_text <- str_replace_all(Aeneid.words.df$words, “[:punct:]”,””) # remove punctuation

Aeneid.words.df$cleaned <- str_replace_all(Aeneid.words.df$lower, “[:punct:]”,””) # remove punctuation

Aeneid.clean.tbl.df <- data.frame(table(Aeneid.words.df$cleaned))

Aeneid.cleaned.tbl.ord.df <- Aeneid.clean.tbl.df[order(-Aeneid.clean.tbl.df$Freq),]

colnames(Aeneid.cleaned.tbl.ord.df)[1] <- “Words”

write.table(Aeneid.cleaned.tbl.ord.df, “~/Education/Emory/Coursework/Digital Humanities Methods/RStudios Practice/Aeneid.tbl.ord.df.txt”,
sep=”\t”) #Save Cleaned tabled ordered Aeneid

SONNETS.cleaned.tbl.ord.df <- read.table(“~/Education/Emory/Coursework/Digital Humanities Methods/RStudios Practice/SONNETS.tbl.ord.df.txt”,
sep=”\t”, stringsAsFactors = FALSE) #Load Cleaned tabled ordered Sonnets

HAMLET.cleaned.tbl.ord.df <- read.table(“~/Education/Emory/Coursework/Digital Humanities Methods/RStudios Practice/HAMLET.tbl.ord.df.txt”,
sep=”\t”, stringsAsFactors = FALSE) #Load Cleaned tabled ordered Hamlet

intersect(Aeneid.cleaned.tbl.ord.df$Words[1:50], HAMLET.cleaned.tbl.ord.df$Words[1:50])

setdiff(Aeneid.cleaned.tbl.ord.df$Words[1:50], HAMLET.cleaned.tbl.ord.df$Words[1:50])

setdiff(HAMLET.cleaned.tbl.ord.df$Words[1:50], Aeneid.cleaned.tbl.ord.df$Words[1:50])

Aeneid.cleaned.tbl.ord.df[which(Aeneid.cleaned.tbl.ord.df$Words[1:20]
%in% HAMLET.cleaned.tbl.ord.df$Words[1:20]),]

Aeneid.cleaned.tbl.ord.df[which(!Aeneid.cleaned.tbl.ord.df$Words[1:20]
%in% HAMLET.cleaned.tbl.ord.df$Words[1:20]),]